LLVM 12.0.1
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
85#include "llvm/Support/Debug.h"
87#include "llvm/Support/Format.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
124// TODO - Remove this option if soft fp128 has been fully supported .
125static cl::opt<bool>
126 EnableSoftFP128("enable-soft-fp128",
127 cl::desc("temp option to enable soft fp128"), cl::Hidden);
128
129STATISTIC(NumTailCalls, "Number of tail calls");
130STATISTIC(NumSiblingCalls, "Number of sibling calls");
131STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133
134static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135
136static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137
138// FIXME: Remove this once the bug has been fixed!
140
142 const PPCSubtarget &STI)
143 : TargetLowering(TM), Subtarget(STI) {
144 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145 // arguments are at least 4/8 bytes aligned.
146 bool isPPC64 = Subtarget.isPPC64();
147 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148
149 // Set up the register classes.
150 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151 if (!useSoftFloat()) {
152 if (hasSPE()) {
153 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154 // EFPU2 APU only supports f32
155 if (!Subtarget.hasEFPU2())
156 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
157 } else {
158 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
159 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
160 }
161 }
162
163 // Match BITREVERSE to customized fast code sequence in the td file.
166
167 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
169
170 // Custom lower inline assembly to check for special registers.
173
174 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
175 for (MVT VT : MVT::integer_valuetypes()) {
178 }
179
180 if (Subtarget.isISA3_0()) {
185 } else {
186 // No extending loads from f16 or HW conversions back and forth.
195 }
196
198
199 // PowerPC has pre-inc load and store's.
210 if (!Subtarget.hasSPE()) {
215 }
216
217 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
218 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
219 for (MVT VT : ScalarIntVTs) {
224 }
225
226 if (Subtarget.useCRBits()) {
228
229 if (isPPC64 || Subtarget.hasFPCVT()) {
232 isPPC64 ? MVT::i64 : MVT::i32);
235 isPPC64 ? MVT::i64 : MVT::i32);
236
239 isPPC64 ? MVT::i64 : MVT::i32);
242 isPPC64 ? MVT::i64 : MVT::i32);
243
246 isPPC64 ? MVT::i64 : MVT::i32);
249 isPPC64 ? MVT::i64 : MVT::i32);
250
253 isPPC64 ? MVT::i64 : MVT::i32);
256 isPPC64 ? MVT::i64 : MVT::i32);
257 } else {
262 }
263
264 // PowerPC does not support direct load/store of condition registers.
267
268 // FIXME: Remove this once the ANDI glue bug is fixed:
269 if (ANDIGlueBug)
271
272 for (MVT VT : MVT::integer_valuetypes()) {
276 }
277
278 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
279 }
280
281 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
282 // PPC (the libcall is not available).
287
288 // We do not currently implement these libm ops for PowerPC.
295
296 // PowerPC has no SREM/UREM instructions unless we are on P9
297 // On P9 we may use a hardware instruction to compute the remainder.
298 // When the result of both the remainder and the division is required it is
299 // more efficient to compute the remainder from the result of the division
300 // rather than use the remainder instruction. The instructions are legalized
301 // directly because the DivRemPairsPass performs the transformation at the IR
302 // level.
303 if (Subtarget.isISA3_0()) {
308 } else {
313 }
314
315 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
324
325 // Handle constrained floating-point operations of scalar.
326 // TODO: Handle SPE specific operation.
333
339 if (Subtarget.hasVSX()) {
342 }
343
344 if (Subtarget.hasFSQRT()) {
347 }
348
349 if (Subtarget.hasFPRND()) {
354
359 }
360
361 // We don't support sin/cos/sqrt/fmod/pow
372 if (Subtarget.hasSPE()) {
375 } else {
378 }
379
380 if (Subtarget.hasSPE())
382
384
385 // If we're enabling GP optimizations, use hardware square root
386 if (!Subtarget.hasFSQRT() &&
387 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
388 Subtarget.hasFRE()))
390
391 if (!Subtarget.hasFSQRT() &&
392 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
393 Subtarget.hasFRES()))
395
396 if (Subtarget.hasFCPSGN()) {
399 } else {
402 }
403
404 if (Subtarget.hasFPRND()) {
409
414 }
415
416 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
417 // to speed up scalar BSWAP64.
418 // CTPOP or CTTZ were introduced in P8/P9 respectively
420 if (Subtarget.hasP9Vector())
422 else
424 if (Subtarget.isISA3_0()) {
427 } else {
430 }
431
432 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
435 } else {
438 }
439
440 // PowerPC does not have ROTR
443
444 if (!Subtarget.useCRBits()) {
445 // PowerPC does not have Select
450 }
451
452 // PowerPC wants to turn select_cc of FP into fsel when possible.
455
456 // PowerPC wants to optimize integer setcc a bit
457 if (!Subtarget.useCRBits())
459
460 if (Subtarget.hasFPU()) {
464
468 }
469
470 // PowerPC does not have BRCOND which requires SetCC
471 if (!Subtarget.useCRBits())
473
475
476 if (Subtarget.hasSPE()) {
477 // SPE has built-in conversions
484 } else {
485 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
488
489 // PowerPC does not have [U|S]INT_TO_FP
494 }
495
496 if (Subtarget.hasDirectMove() && isPPC64) {
501 if (TM.Options.UnsafeFPMath) {
510 }
511 } else {
516 }
517
518 // We cannot sextinreg(i1). Expand to shifts.
520
521 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
522 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
523 // support continuation, user-level threading, and etc.. As a result, no
524 // other SjLj exception interfaces are implemented and please don't build
525 // your own exception handling based on them.
526 // LLVM/Clang supports zero-cost DWARF exception handling.
529
530 // We want to legalize GlobalAddress and ConstantPool nodes into the
531 // appropriate instructions to materialize the address.
542
543 // TRAP is legal.
545
546 // TRAMPOLINE is custom lowered.
549
550 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
552
553 if (Subtarget.is64BitELFABI()) {
554 // VAARG always uses double-word chunks, so promote anything smaller.
564 } else if (Subtarget.is32BitELFABI()) {
565 // VAARG is custom lowered with the 32-bit SVR4 ABI.
568 } else
570
571 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
572 if (Subtarget.is32BitELFABI())
574 else
576
577 // Use the default implementation.
587
588 // We want to custom lower some of our intrinsics.
590
591 // To handle counter-based loop conditions.
593
598
599 // Comparisons that require checking two conditions.
600 if (Subtarget.hasSPE()) {
605 }
618
621
622 if (Subtarget.has64BitSupport()) {
623 // They also have instructions for converting between i64 and fp.
632 // This is just the low 32 bits of a (signed) fp->i64 conversion.
633 // We cannot do this with Promote because i64 is not a legal type.
636
637 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
640 }
641 } else {
642 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
643 if (Subtarget.hasSPE()) {
646 } else {
649 }
650 }
651
652 // With the instructions enabled under FPCVT, we can do everything.
653 if (Subtarget.hasFPCVT()) {
654 if (Subtarget.has64BitSupport()) {
663 }
664
673 }
674
675 if (Subtarget.use64BitRegs()) {
676 // 64-bit PowerPC implementations can support i64 types directly
677 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
678 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
680 // 64-bit PowerPC wants to expand i128 shifts itself.
684 } else {
685 // 32-bit PowerPC wants to expand i64 shifts itself.
689 }
690
691 // PowerPC has better expansions for funnel shifts than the generic
692 // TargetLowering::expandFunnelShift.
693 if (Subtarget.has64BitSupport()) {
696 }
699
700 if (Subtarget.hasVSX()) {
705 }
706
707 if (Subtarget.hasAltivec()) {
708 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
713 }
714 // First set operation action for all vector types to expand. Then we
715 // will selectively turn on ones that can be effectively codegen'd.
717 // add/sub are legal for all supported vector VT's.
720
721 // For v2i64, these are only valid with P8Vector. This is corrected after
722 // the loop.
723 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
728 }
729 else {
734 }
735
736 if (Subtarget.hasVSX()) {
739 }
740
741 // Vector instructions introduced in P8
742 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
745 }
746 else {
749 }
750
751 // Vector instructions introduced in P9
752 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
754 else
756
757 // We promote all shuffles to v16i8.
760
761 // We promote all non-typed operations to v4i32.
777
778 // No other operations are legal.
816
817 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
818 setTruncStoreAction(VT, InnerVT, Expand);
821 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
822 }
823 }
825 if (!Subtarget.hasP8Vector()) {
830 }
831
832 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
833 // with merges, splats, etc.
835
836 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
837 // are cheap, so handle them before they get expanded to scalar.
843
849 Subtarget.useCRBits() ? Legal : Expand);
863
864 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
866 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
867 if (Subtarget.hasAltivec())
868 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
870 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
871 if (Subtarget.hasP8Altivec())
873
874 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
875 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
876 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
877 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
878
881
882 if (Subtarget.hasVSX()) {
885 }
886
887 if (Subtarget.hasP8Altivec())
889 else
891
892 if (Subtarget.isISA3_1()) {
911 }
912
915
918
923
924 // Altivec does not contain unordered floating-point compare instructions
929
930 if (Subtarget.hasVSX()) {
933 if (Subtarget.hasP8Vector()) {
936 }
937 if (Subtarget.hasDirectMove() && isPPC64) {
946 }
948
949 // The nearbyint variants are not allowed to raise the inexact exception
950 // so we can only code-gen them with unsafe math.
951 if (TM.Options.UnsafeFPMath) {
954 }
955
964
970
973
976
977 // Share the Altivec comparison restrictions.
982
985
987
988 if (Subtarget.hasP8Vector())
989 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
990
991 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
992
993 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
994 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
995 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
996
997 if (Subtarget.hasP8Altivec()) {
1001
1002 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1003 // SRL, but not for SRA because of the instructions available:
1004 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1005 // doing
1009
1011 }
1012 else {
1016
1018
1019 // VSX v2i64 only supports non-arithmetic operations.
1022 }
1023
1024 if (Subtarget.isISA3_1())
1026 else
1028
1033
1035
1044
1045 // Custom handling for partial vectors of integers converted to
1046 // floating point. We already have optimal handling for v2i32 through
1047 // the DAG combine, so those aren't necessary.
1064
1071
1072 if (Subtarget.hasDirectMove())
1075
1076 // Handle constrained floating-point operations of vector.
1077 // The predictor is `hasVSX` because altivec instruction has
1078 // no exception but VSX vector instruction has.
1092
1106
1107 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1108 }
1109
1110 if (Subtarget.hasP8Altivec()) {
1111 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1112 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1113 }
1114
1115 if (Subtarget.hasP9Vector()) {
1118
1119 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1120 // SRL, but not for SRA because of the instructions available:
1121 // VS{RL} and VS{RL}O.
1125
1126 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1132 // No extending loads to f128 on PPC.
1133 for (MVT FPT : MVT::fp_valuetypes())
1142
1149
1156 // No implementation for these ops for PowerPC.
1162
1163 // Handle constrained floating-point operations of fp128
1184 } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1185 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1186
1187 for (MVT FPT : MVT::fp_valuetypes())
1189
1192
1195
1196 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1197 // fp_to_uint and int_to_fp.
1200
1213
1216
1217 // Expand the fp_extend if the target type is fp128.
1220
1221 // Expand the fp_round if the source type is fp128.
1222 for (MVT VT : {MVT::f32, MVT::f64}) {
1225 }
1226 }
1227
1228 if (Subtarget.hasP9Altivec()) {
1231
1239 }
1240 }
1241
1242 if (Subtarget.pairedVectorMemops()) {
1243 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1246 }
1247 if (Subtarget.hasMMA()) {
1248 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1252 }
1253
1254 if (Subtarget.has64BitSupport())
1256
1257 if (Subtarget.isISA3_1())
1259
1261
1262 if (!isPPC64) {
1265 }
1266
1268
1269 if (Subtarget.hasAltivec()) {
1270 // Altivec instructions set fields to all zeros or all ones.
1272 }
1273
1274 if (!isPPC64) {
1275 // These libcalls are not available in 32-bit.
1276 setLibcallName(RTLIB::SHL_I128, nullptr);
1277 setLibcallName(RTLIB::SRL_I128, nullptr);
1278 setLibcallName(RTLIB::SRA_I128, nullptr);
1279 }
1280
1281 if (!isPPC64)
1283
1284 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1285
1286 // We have target-specific dag combine patterns for the following nodes:
1295 if (Subtarget.hasFPCVT())
1300 if (Subtarget.useCRBits())
1306
1310
1313
1314
1315 if (Subtarget.useCRBits()) {
1319 }
1320
1321 if (Subtarget.hasP9Altivec()) {
1324 }
1325
1326 setLibcallName(RTLIB::LOG_F128, "logf128");
1327 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1328 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1329 setLibcallName(RTLIB::EXP_F128, "expf128");
1330 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1331 setLibcallName(RTLIB::SIN_F128, "sinf128");
1332 setLibcallName(RTLIB::COS_F128, "cosf128");
1333 setLibcallName(RTLIB::POW_F128, "powf128");
1334 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1335 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1336 setLibcallName(RTLIB::REM_F128, "fmodf128");
1337 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1338 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1339 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1340 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1341 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1342 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1343 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1344 setLibcallName(RTLIB::RINT_F128, "rintf128");
1345 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1346 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1347 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1348 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1349
1350 // With 32 condition bits, we don't need to sink (and duplicate) compares
1351 // aggressively in CodeGenPrep.
1352 if (Subtarget.useCRBits()) {
1355 }
1356
1358
1359 switch (Subtarget.getCPUDirective()) {
1360 default: break;
1361 case PPC::DIR_970:
1362 case PPC::DIR_A2:
1363 case PPC::DIR_E500:
1364 case PPC::DIR_E500mc:
1365 case PPC::DIR_E5500:
1366 case PPC::DIR_PWR4:
1367 case PPC::DIR_PWR5:
1368 case PPC::DIR_PWR5X:
1369 case PPC::DIR_PWR6:
1370 case PPC::DIR_PWR6X:
1371 case PPC::DIR_PWR7:
1372 case PPC::DIR_PWR8:
1373 case PPC::DIR_PWR9:
1374 case PPC::DIR_PWR10:
1378 break;
1379 }
1380
1381 if (Subtarget.enableMachineScheduler())
1383 else
1385
1387
1388 // The Freescale cores do better with aggressive inlining of memcpy and
1389 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1390 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1391 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1392 MaxStoresPerMemset = 32;
1394 MaxStoresPerMemcpy = 32;
1398 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1399 // The A2 also benefits from (very) aggressive inlining of memcpy and
1400 // friends. The overhead of a the function call, even when warm, can be
1401 // over one hundred cycles.
1402 MaxStoresPerMemset = 128;
1403 MaxStoresPerMemcpy = 128;
1404 MaxStoresPerMemmove = 128;
1405 MaxLoadsPerMemcmp = 128;
1406 } else {
1409 }
1410
1411 IsStrictFPEnabled = true;
1412
1413 // Let the subtarget (CPU) decide if a predictable select is more expensive
1414 // than the corresponding branch. This information is used in CGP to decide
1415 // when to convert selects into branches.
1417}
1418
1419/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1420/// the desired ByVal argument alignment.
1421static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1422 if (MaxAlign == MaxMaxAlign)
1423 return;
1424 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1425 if (MaxMaxAlign >= 32 &&
1426 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1427 MaxAlign = Align(32);
1428 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1429 MaxAlign < 16)
1430 MaxAlign = Align(16);
1431 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1432 Align EltAlign;
1433 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1434 if (EltAlign > MaxAlign)
1435 MaxAlign = EltAlign;
1436 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1437 for (auto *EltTy : STy->elements()) {
1438 Align EltAlign;
1439 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1440 if (EltAlign > MaxAlign)
1441 MaxAlign = EltAlign;
1442 if (MaxAlign == MaxMaxAlign)
1443 break;
1444 }
1445 }
1446}
1447
1448/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1449/// function arguments in the caller parameter area.
1451 const DataLayout &DL) const {
1452 // 16byte and wider vectors are passed on 16byte boundary.
1453 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1454 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1455 if (Subtarget.hasAltivec())
1456 getMaxByValAlign(Ty, Alignment, Align(16));
1457 return Alignment.value();
1458}
1459
1461 return Subtarget.useSoftFloat();
1462}
1463
1465 return Subtarget.hasSPE();
1466}
1467
1469 return VT.isScalarInteger();
1470}
1471
1472const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1473 switch ((PPCISD::NodeType)Opcode) {
1474 case PPCISD::FIRST_NUMBER: break;
1475 case PPCISD::FSEL: return "PPCISD::FSEL";
1476 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1477 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1478 case PPCISD::FCFID: return "PPCISD::FCFID";
1479 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1480 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1481 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1482 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1483 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1484 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1485 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1487 return "PPCISD::FP_TO_UINT_IN_VSR,";
1489 return "PPCISD::FP_TO_SINT_IN_VSR";
1490 case PPCISD::FRE: return "PPCISD::FRE";
1491 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1492 case PPCISD::FTSQRT:
1493 return "PPCISD::FTSQRT";
1494 case PPCISD::FSQRT:
1495 return "PPCISD::FSQRT";
1496 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1497 case PPCISD::VPERM: return "PPCISD::VPERM";
1498 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1500 return "PPCISD::XXSPLTI_SP_TO_DP";
1502 return "PPCISD::XXSPLTI32DX";
1503 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1504 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1505 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1506 case PPCISD::CMPB: return "PPCISD::CMPB";
1507 case PPCISD::Hi: return "PPCISD::Hi";
1508 case PPCISD::Lo: return "PPCISD::Lo";
1509 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1510 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1511 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1512 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1513 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1514 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1515 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1516 case PPCISD::SRL: return "PPCISD::SRL";
1517 case PPCISD::SRA: return "PPCISD::SRA";
1518 case PPCISD::SHL: return "PPCISD::SHL";
1519 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1520 case PPCISD::CALL: return "PPCISD::CALL";
1521 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1522 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1523 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1524 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1525 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1526 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1527 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1528 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1529 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1530 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1531 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1532 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1533 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1534 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1535 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1537 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1539 return "PPCISD::ANDI_rec_1_EQ_BIT";
1541 return "PPCISD::ANDI_rec_1_GT_BIT";
1542 case PPCISD::VCMP: return "PPCISD::VCMP";
1543 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1544 case PPCISD::LBRX: return "PPCISD::LBRX";
1545 case PPCISD::STBRX: return "PPCISD::STBRX";
1546 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1547 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1548 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1549 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1550 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1551 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1552 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1553 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1554 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1556 return "PPCISD::ST_VSR_SCAL_INT";
1557 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1558 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1559 case PPCISD::BDZ: return "PPCISD::BDZ";
1560 case PPCISD::MFFS: return "PPCISD::MFFS";
1561 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1562 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1563 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1564 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1565 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1566 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1567 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1568 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1569 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1570 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1571 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1572 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1573 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1574 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1575 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1576 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1577 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1578 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1579 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1581 return "PPCISD::PADDI_DTPREL";
1582 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1583 case PPCISD::SC: return "PPCISD::SC";
1584 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1585 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1586 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1587 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1588 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1589 case PPCISD::VABSD: return "PPCISD::VABSD";
1590 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1591 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1592 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1593 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1594 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1595 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1596 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1598 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1600 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1601 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1602 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1603 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1604 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1605 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1606 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1608 return "PPCISD::STRICT_FADDRTZ";
1610 return "PPCISD::STRICT_FCTIDZ";
1612 return "PPCISD::STRICT_FCTIWZ";
1614 return "PPCISD::STRICT_FCTIDUZ";
1616 return "PPCISD::STRICT_FCTIWUZ";
1618 return "PPCISD::STRICT_FCFID";
1620 return "PPCISD::STRICT_FCFIDU";
1622 return "PPCISD::STRICT_FCFIDS";
1624 return "PPCISD::STRICT_FCFIDUS";
1625 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1626 }
1627 return nullptr;
1628}
1629
1631 EVT VT) const {
1632 if (!VT.isVector())
1633 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1634
1636}
1637
1639 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1640 return true;
1641}
1642
1643//===----------------------------------------------------------------------===//
1644// Node matching predicates, for use by the tblgen matching code.
1645//===----------------------------------------------------------------------===//
1646
1647/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1650 return CFP->getValueAPF().isZero();
1651 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1652 // Maybe this has already been legalized into the constant pool?
1653 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1654 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1655 return CFP->getValueAPF().isZero();
1656 }
1657 return false;
1658}
1659
1660/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1661/// true if Op is undef or if it matches the specified value.
1662static bool isConstantOrUndef(int Op, int Val) {
1663 return Op < 0 || Op == Val;
1664}
1665
1666/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1667/// VPKUHUM instruction.
1668/// The ShuffleKind distinguishes between big-endian operations with
1669/// two different inputs (0), either-endian operations with two identical
1670/// inputs (1), and little-endian operations with two different inputs (2).
1671/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1673 SelectionDAG &DAG) {
1674 bool IsLE = DAG.getDataLayout().isLittleEndian();
1675 if (ShuffleKind == 0) {
1676 if (IsLE)
1677 return false;
1678 for (unsigned i = 0; i != 16; ++i)
1679 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1680 return false;
1681 } else if (ShuffleKind == 2) {
1682 if (!IsLE)
1683 return false;
1684 for (unsigned i = 0; i != 16; ++i)
1685 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1686 return false;
1687 } else if (ShuffleKind == 1) {
1688 unsigned j = IsLE ? 0 : 1;
1689 for (unsigned i = 0; i != 8; ++i)
1690 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1691 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1692 return false;
1693 }
1694 return true;
1695}
1696
1697/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1698/// VPKUWUM instruction.
1699/// The ShuffleKind distinguishes between big-endian operations with
1700/// two different inputs (0), either-endian operations with two identical
1701/// inputs (1), and little-endian operations with two different inputs (2).
1702/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1704 SelectionDAG &DAG) {
1705 bool IsLE = DAG.getDataLayout().isLittleEndian();
1706 if (ShuffleKind == 0) {
1707 if (IsLE)
1708 return false;
1709 for (unsigned i = 0; i != 16; i += 2)
1710 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1711 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1712 return false;
1713 } else if (ShuffleKind == 2) {
1714 if (!IsLE)
1715 return false;
1716 for (unsigned i = 0; i != 16; i += 2)
1717 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1718 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1719 return false;
1720 } else if (ShuffleKind == 1) {
1721 unsigned j = IsLE ? 0 : 2;
1722 for (unsigned i = 0; i != 8; i += 2)
1723 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1724 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1725 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1726 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1727 return false;
1728 }
1729 return true;
1730}
1731
1732/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1733/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1734/// current subtarget.
1735///
1736/// The ShuffleKind distinguishes between big-endian operations with
1737/// two different inputs (0), either-endian operations with two identical
1738/// inputs (1), and little-endian operations with two different inputs (2).
1739/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1741 SelectionDAG &DAG) {
1742 const PPCSubtarget& Subtarget =
1743 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1744 if (!Subtarget.hasP8Vector())
1745 return false;
1746
1747 bool IsLE = DAG.getDataLayout().isLittleEndian();
1748 if (ShuffleKind == 0) {
1749 if (IsLE)
1750 return false;
1751 for (unsigned i = 0; i != 16; i += 4)
1752 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1753 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1754 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1755 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1756 return false;
1757 } else if (ShuffleKind == 2) {
1758 if (!IsLE)
1759 return false;
1760 for (unsigned i = 0; i != 16; i += 4)
1761 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1762 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1763 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1764 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1765 return false;
1766 } else if (ShuffleKind == 1) {
1767 unsigned j = IsLE ? 0 : 4;
1768 for (unsigned i = 0; i != 8; i += 4)
1769 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1770 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1771 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1772 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1773 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1774 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1775 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1776 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1777 return false;
1778 }
1779 return true;
1780}
1781
1782/// isVMerge - Common function, used to match vmrg* shuffles.
1783///
1784static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1785 unsigned LHSStart, unsigned RHSStart) {
1786 if (N->getValueType(0) != MVT::v16i8)
1787 return false;
1788 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1789 "Unsupported merge size!");
1790
1791 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1792 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1793 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1794 LHSStart+j+i*UnitSize) ||
1795 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1796 RHSStart+j+i*UnitSize))
1797 return false;
1798 }
1799 return true;
1800}
1801
1802/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1803/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1804/// The ShuffleKind distinguishes between big-endian merges with two
1805/// different inputs (0), either-endian merges with two identical inputs (1),
1806/// and little-endian merges with two different inputs (2). For the latter,
1807/// the input operands are swapped (see PPCInstrAltivec.td).
1809 unsigned ShuffleKind, SelectionDAG &DAG) {
1810 if (DAG.getDataLayout().isLittleEndian()) {
1811 if (ShuffleKind == 1) // unary
1812 return isVMerge(N, UnitSize, 0, 0);
1813 else if (ShuffleKind == 2) // swapped
1814 return isVMerge(N, UnitSize, 0, 16);
1815 else
1816 return false;
1817 } else {
1818 if (ShuffleKind == 1) // unary
1819 return isVMerge(N, UnitSize, 8, 8);
1820 else if (ShuffleKind == 0) // normal
1821 return isVMerge(N, UnitSize, 8, 24);
1822 else
1823 return false;
1824 }
1825}
1826
1827/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1828/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1829/// The ShuffleKind distinguishes between big-endian merges with two
1830/// different inputs (0), either-endian merges with two identical inputs (1),
1831/// and little-endian merges with two different inputs (2). For the latter,
1832/// the input operands are swapped (see PPCInstrAltivec.td).
1834 unsigned ShuffleKind, SelectionDAG &DAG) {
1835 if (DAG.getDataLayout().isLittleEndian()) {
1836 if (ShuffleKind == 1) // unary
1837 return isVMerge(N, UnitSize, 8, 8);
1838 else if (ShuffleKind == 2) // swapped
1839 return isVMerge(N, UnitSize, 8, 24);
1840 else
1841 return false;
1842 } else {
1843 if (ShuffleKind == 1) // unary
1844 return isVMerge(N, UnitSize, 0, 0);
1845 else if (ShuffleKind == 0) // normal
1846 return isVMerge(N, UnitSize, 0, 16);
1847 else
1848 return false;
1849 }
1850}
1851
1852/**
1853 * Common function used to match vmrgew and vmrgow shuffles
1854 *
1855 * The indexOffset determines whether to look for even or odd words in
1856 * the shuffle mask. This is based on the of the endianness of the target
1857 * machine.
1858 * - Little Endian:
1859 * - Use offset of 0 to check for odd elements
1860 * - Use offset of 4 to check for even elements
1861 * - Big Endian:
1862 * - Use offset of 0 to check for even elements
1863 * - Use offset of 4 to check for odd elements
1864 * A detailed description of the vector element ordering for little endian and
1865 * big endian can be found at
1866 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1867 * Targeting your applications - what little endian and big endian IBM XL C/C++
1868 * compiler differences mean to you
1869 *
1870 * The mask to the shuffle vector instruction specifies the indices of the
1871 * elements from the two input vectors to place in the result. The elements are
1872 * numbered in array-access order, starting with the first vector. These vectors
1873 * are always of type v16i8, thus each vector will contain 16 elements of size
1874 * 8. More info on the shuffle vector can be found in the
1875 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1876 * Language Reference.
1877 *
1878 * The RHSStartValue indicates whether the same input vectors are used (unary)
1879 * or two different input vectors are used, based on the following:
1880 * - If the instruction uses the same vector for both inputs, the range of the
1881 * indices will be 0 to 15. In this case, the RHSStart value passed should
1882 * be 0.
1883 * - If the instruction has two different vectors then the range of the
1884 * indices will be 0 to 31. In this case, the RHSStart value passed should
1885 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1886 * to 31 specify elements in the second vector).
1887 *
1888 * \param[in] N The shuffle vector SD Node to analyze
1889 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1890 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1891 * vector to the shuffle_vector instruction
1892 * \return true iff this shuffle vector represents an even or odd word merge
1893 */
1894static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1895 unsigned RHSStartValue) {
1896 if (N->getValueType(0) != MVT::v16i8)
1897 return false;
1898
1899 for (unsigned i = 0; i < 2; ++i)
1900 for (unsigned j = 0; j < 4; ++j)
1901 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1902 i*RHSStartValue+j+IndexOffset) ||
1903 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1904 i*RHSStartValue+j+IndexOffset+8))
1905 return false;
1906 return true;
1907}
1908
1909/**
1910 * Determine if the specified shuffle mask is suitable for the vmrgew or
1911 * vmrgow instructions.
1912 *
1913 * \param[in] N The shuffle vector SD Node to analyze
1914 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1915 * \param[in] ShuffleKind Identify the type of merge:
1916 * - 0 = big-endian merge with two different inputs;
1917 * - 1 = either-endian merge with two identical inputs;
1918 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1919 * little-endian merges).
1920 * \param[in] DAG The current SelectionDAG
1921 * \return true iff this shuffle mask
1922 */
1924 unsigned ShuffleKind, SelectionDAG &DAG) {
1925 if (DAG.getDataLayout().isLittleEndian()) {
1926 unsigned indexOffset = CheckEven ? 4 : 0;
1927 if (ShuffleKind == 1) // Unary
1928 return isVMerge(N, indexOffset, 0);
1929 else if (ShuffleKind == 2) // swapped
1930 return isVMerge(N, indexOffset, 16);
1931 else
1932 return false;
1933 }
1934 else {
1935 unsigned indexOffset = CheckEven ? 0 : 4;
1936 if (ShuffleKind == 1) // Unary
1937 return isVMerge(N, indexOffset, 0);
1938 else if (ShuffleKind == 0) // Normal
1939 return isVMerge(N, indexOffset, 16);
1940 else
1941 return false;
1942 }
1943 return false;
1944}
1945
1946/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1947/// amount, otherwise return -1.
1948/// The ShuffleKind distinguishes between big-endian operations with two
1949/// different inputs (0), either-endian operations with two identical inputs
1950/// (1), and little-endian operations with two different inputs (2). For the
1951/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1952int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1953 SelectionDAG &DAG) {
1954 if (N->getValueType(0) != MVT::v16i8)
1955 return -1;
1956
1958
1959 // Find the first non-undef value in the shuffle mask.
1960 unsigned i;
1961 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1962 /*search*/;
1963
1964 if (i == 16) return -1; // all undef.
1965
1966 // Otherwise, check to see if the rest of the elements are consecutively
1967 // numbered from this value.
1968 unsigned ShiftAmt = SVOp->getMaskElt(i);
1969 if (ShiftAmt < i) return -1;
1970
1971 ShiftAmt -= i;
1972 bool isLE = DAG.getDataLayout().isLittleEndian();
1973
1974 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1975 // Check the rest of the elements to see if they are consecutive.
1976 for (++i; i != 16; ++i)
1977 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1978 return -1;
1979 } else if (ShuffleKind == 1) {
1980 // Check the rest of the elements to see if they are consecutive.
1981 for (++i; i != 16; ++i)
1982 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1983 return -1;
1984 } else
1985 return -1;
1986
1987 if (isLE)
1988 ShiftAmt = 16 - ShiftAmt;
1989
1990 return ShiftAmt;
1991}
1992
1993/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1994/// specifies a splat of a single element that is suitable for input to
1995/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1997 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1998 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1999
2000 // The consecutive indices need to specify an element, not part of two
2001 // different elements. So abandon ship early if this isn't the case.
2002 if (N->getMaskElt(0) % EltSize != 0)
2003 return false;
2004
2005 // This is a splat operation if each element of the permute is the same, and
2006 // if the value doesn't reference the second vector.
2007 unsigned ElementBase = N->getMaskElt(0);
2008
2009 // FIXME: Handle UNDEF elements too!
2010 if (ElementBase >= 16)
2011 return false;
2012
2013 // Check that the indices are consecutive, in the case of a multi-byte element
2014 // splatted with a v16i8 mask.
2015 for (unsigned i = 1; i != EltSize; ++i)
2016 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2017 return false;
2018
2019 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2020 if (N->getMaskElt(i) < 0) continue;
2021 for (unsigned j = 0; j != EltSize; ++j)
2022 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2023 return false;
2024 }
2025 return true;
2026}
2027
2028/// Check that the mask is shuffling N byte elements. Within each N byte
2029/// element of the mask, the indices could be either in increasing or
2030/// decreasing order as long as they are consecutive.
2031/// \param[in] N the shuffle vector SD Node to analyze
2032/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2033/// Word/DoubleWord/QuadWord).
2034/// \param[in] StepLen the delta indices number among the N byte element, if
2035/// the mask is in increasing/decreasing order then it is 1/-1.
2036/// \return true iff the mask is shuffling N byte elements.
2037static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2038 int StepLen) {
2039 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2040 "Unexpected element width.");
2041 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2042
2043 unsigned NumOfElem = 16 / Width;
2044 unsigned MaskVal[16]; // Width is never greater than 16
2045 for (unsigned i = 0; i < NumOfElem; ++i) {
2046 MaskVal[0] = N->getMaskElt(i * Width);
2047 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2048 return false;
2049 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2050 return false;
2051 }
2052
2053 for (unsigned int j = 1; j < Width; ++j) {
2054 MaskVal[j] = N->getMaskElt(i * Width + j);
2055 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2056 return false;
2057 }
2058 }
2059 }
2060
2061 return true;
2062}
2063
2064bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2065 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2066 if (!isNByteElemShuffleMask(N, 4, 1))
2067 return false;
2068
2069 // Now we look at mask elements 0,4,8,12
2070 unsigned M0 = N->getMaskElt(0) / 4;
2071 unsigned M1 = N->getMaskElt(4) / 4;
2072 unsigned M2 = N->getMaskElt(8) / 4;
2073 unsigned M3 = N->getMaskElt(12) / 4;
2074 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2075 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2076
2077 // Below, let H and L be arbitrary elements of the shuffle mask
2078 // where H is in the range [4,7] and L is in the range [0,3].
2079 // H, 1, 2, 3 or L, 5, 6, 7
2080 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2081 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2082 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2083 InsertAtByte = IsLE ? 12 : 0;
2084 Swap = M0 < 4;
2085 return true;
2086 }
2087 // 0, H, 2, 3 or 4, L, 6, 7
2088 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2089 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2090 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2091 InsertAtByte = IsLE ? 8 : 4;
2092 Swap = M1 < 4;
2093 return true;
2094 }
2095 // 0, 1, H, 3 or 4, 5, L, 7
2096 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2097 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2098 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2099 InsertAtByte = IsLE ? 4 : 8;
2100 Swap = M2 < 4;
2101 return true;
2102 }
2103 // 0, 1, 2, H or 4, 5, 6, L
2104 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2105 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2106 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2107 InsertAtByte = IsLE ? 0 : 12;
2108 Swap = M3 < 4;
2109 return true;
2110 }
2111
2112 // If both vector operands for the shuffle are the same vector, the mask will
2113 // contain only elements from the first one and the second one will be undef.
2114 if (N->getOperand(1).isUndef()) {
2115 ShiftElts = 0;
2116 Swap = true;
2117 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2118 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2119 InsertAtByte = IsLE ? 12 : 0;
2120 return true;
2121 }
2122 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2123 InsertAtByte = IsLE ? 8 : 4;
2124 return true;
2125 }
2126 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2127 InsertAtByte = IsLE ? 4 : 8;
2128 return true;
2129 }
2130 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2131 InsertAtByte = IsLE ? 0 : 12;
2132 return true;
2133 }
2134 }
2135
2136 return false;
2137}
2138
2140 bool &Swap, bool IsLE) {
2141 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2142 // Ensure each byte index of the word is consecutive.
2143 if (!isNByteElemShuffleMask(N, 4, 1))
2144 return false;
2145
2146 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2147 unsigned M0 = N->getMaskElt(0) / 4;
2148 unsigned M1 = N->getMaskElt(4) / 4;
2149 unsigned M2 = N->getMaskElt(8) / 4;
2150 unsigned M3 = N->getMaskElt(12) / 4;
2151
2152 // If both vector operands for the shuffle are the same vector, the mask will
2153 // contain only elements from the first one and the second one will be undef.
2154 if (N->getOperand(1).isUndef()) {
2155 assert(M0 < 4 && "Indexing into an undef vector?");
2156 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2157 return false;
2158
2159 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2160 Swap = false;
2161 return true;
2162 }
2163
2164 // Ensure each word index of the ShuffleVector Mask is consecutive.
2165 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2166 return false;
2167
2168 if (IsLE) {
2169 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2170 // Input vectors don't need to be swapped if the leading element
2171 // of the result is one of the 3 left elements of the second vector
2172 // (or if there is no shift to be done at all).
2173 Swap = false;
2174 ShiftElts = (8 - M0) % 8;
2175 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2176 // Input vectors need to be swapped if the leading element
2177 // of the result is one of the 3 left elements of the first vector
2178 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2179 Swap = true;
2180 ShiftElts = (4 - M0) % 4;
2181 }
2182
2183 return true;
2184 } else { // BE
2185 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2186 // Input vectors don't need to be swapped if the leading element
2187 // of the result is one of the 4 elements of the first vector.
2188 Swap = false;
2189 ShiftElts = M0;
2190 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2191 // Input vectors need to be swapped if the leading element
2192 // of the result is one of the 4 elements of the right vector.
2193 Swap = true;
2194 ShiftElts = M0 - 4;
2195 }
2196
2197 return true;
2198 }
2199}
2200
2202 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2203
2204 if (!isNByteElemShuffleMask(N, Width, -1))
2205 return false;
2206
2207 for (int i = 0; i < 16; i += Width)
2208 if (N->getMaskElt(i) != i + Width - 1)
2209 return false;
2210
2211 return true;
2212}
2213
2217
2221
2225
2229
2230/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2231/// if the inputs to the instruction should be swapped and set \p DM to the
2232/// value for the immediate.
2233/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2234/// AND element 0 of the result comes from the first input (LE) or second input
2235/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2236/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2237/// mask.
2239 bool &Swap, bool IsLE) {
2240 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2241
2242 // Ensure each byte index of the double word is consecutive.
2243 if (!isNByteElemShuffleMask(N, 8, 1))
2244 return false;
2245
2246 unsigned M0 = N->getMaskElt(0) / 8;
2247 unsigned M1 = N->getMaskElt(8) / 8;
2248 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2249
2250 // If both vector operands for the shuffle are the same vector, the mask will
2251 // contain only elements from the first one and the second one will be undef.
2252 if (N->getOperand(1).isUndef()) {
2253 if ((M0 | M1) < 2) {
2254 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2255 Swap = false;
2256 return true;
2257 } else
2258 return false;
2259 }
2260
2261 if (IsLE) {
2262 if (M0 > 1 && M1 < 2) {
2263 Swap = false;
2264 } else if (M0 < 2 && M1 > 1) {
2265 M0 = (M0 + 2) % 4;
2266 M1 = (M1 + 2) % 4;
2267 Swap = true;
2268 } else
2269 return false;
2270
2271 // Note: if control flow comes here that means Swap is already set above
2272 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2273 return true;
2274 } else { // BE
2275 if (M0 < 2 && M1 > 1) {
2276 Swap = false;
2277 } else if (M0 > 1 && M1 < 2) {
2278 M0 = (M0 + 2) % 4;
2279 M1 = (M1 + 2) % 4;
2280 Swap = true;
2281 } else
2282 return false;
2283
2284 // Note: if control flow comes here that means Swap is already set above
2285 DM = (M0 << 1) + (M1 & 1);
2286 return true;
2287 }
2288}
2289
2290
2291/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2292/// appropriate for PPC mnemonics (which have a big endian bias - namely
2293/// elements are counted from the left of the vector register).
2294unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2295 SelectionDAG &DAG) {
2297 assert(isSplatShuffleMask(SVOp, EltSize));
2298 if (DAG.getDataLayout().isLittleEndian())
2299 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2300 else
2301 return SVOp->getMaskElt(0) / EltSize;
2302}
2303
2304/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2305/// by using a vspltis[bhw] instruction of the specified element size, return
2306/// the constant being splatted. The ByteSize field indicates the number of
2307/// bytes of each element [124] -> [bhw].
2309 SDValue OpVal(nullptr, 0);
2310
2311 // If ByteSize of the splat is bigger than the element size of the
2312 // build_vector, then we have a case where we are checking for a splat where
2313 // multiple elements of the buildvector are folded together into a single
2314 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2315 unsigned EltSize = 16/N->getNumOperands();
2316 if (EltSize < ByteSize) {
2317 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2318 SDValue UniquedVals[4];
2319 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2320
2321 // See if all of the elements in the buildvector agree across.
2322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2323 if (N->getOperand(i).isUndef()) continue;
2324 // If the element isn't a constant, bail fully out.
2325 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2326
2327 if (!UniquedVals[i&(Multiple-1)].getNode())
2328 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2329 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2330 return SDValue(); // no match.
2331 }
2332
2333 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2334 // either constant or undef values that are identical for each chunk. See
2335 // if these chunks can form into a larger vspltis*.
2336
2337 // Check to see if all of the leading entries are either 0 or -1. If
2338 // neither, then this won't fit into the immediate field.
2339 bool LeadingZero = true;
2340 bool LeadingOnes = true;
2341 for (unsigned i = 0; i != Multiple-1; ++i) {
2342 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2343
2344 LeadingZero &= isNullConstant(UniquedVals[i]);
2345 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2346 }
2347 // Finally, check the least significant entry.
2348 if (LeadingZero) {
2349 if (!UniquedVals[Multiple-1].getNode())
2350 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2351 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2352 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2353 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2354 }
2355 if (LeadingOnes) {
2356 if (!UniquedVals[Multiple-1].getNode())
2357 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2358 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2359 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2360 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2361 }
2362
2363 return SDValue();
2364 }
2365
2366 // Check to see if this buildvec has a single non-undef value in its elements.
2367 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2368 if (N->getOperand(i).isUndef()) continue;
2369 if (!OpVal.getNode())
2370 OpVal = N->getOperand(i);
2371 else if (OpVal != N->getOperand(i))
2372 return SDValue();
2373 }
2374
2375 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2376
2377 unsigned ValSizeInBytes = EltSize;
2378 uint64_t Value = 0;
2379 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2380 Value = CN->getZExtValue();
2381 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2382 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2383 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2384 }
2385
2386 // If the splat value is larger than the element value, then we can never do
2387 // this splat. The only case that we could fit the replicated bits into our
2388 // immediate field for would be zero, and we prefer to use vxor for it.
2389 if (ValSizeInBytes < ByteSize) return SDValue();
2390
2391 // If the element value is larger than the splat value, check if it consists
2392 // of a repeated bit pattern of size ByteSize.
2393 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2394 return SDValue();
2395
2396 // Properly sign extend the value.
2397 int MaskVal = SignExtend32(Value, ByteSize * 8);
2398
2399 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2400 if (MaskVal == 0) return SDValue();
2401
2402 // Finally, if this value fits in a 5 bit sext field, return it
2403 if (SignExtend32<5>(MaskVal) == MaskVal)
2404 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2405 return SDValue();
2406}
2407
2408//===----------------------------------------------------------------------===//
2409// Addressing Mode Selection
2410//===----------------------------------------------------------------------===//
2411
2412/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2413/// or 64-bit immediate, and if the value can be accurately represented as a
2414/// sign extension from a 16-bit value. If so, this returns true and the
2415/// immediate.
2416bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2417 if (!isa<ConstantSDNode>(N))
2418 return false;
2419
2420 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2421 if (N->getValueType(0) == MVT::i32)
2422 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2423 else
2424 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2425}
2426bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2427 return isIntS16Immediate(Op.getNode(), Imm);
2428}
2429
2430
2431/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2432/// be represented as an indexed [r+r] operation.
2434 SDValue &Index,
2435 SelectionDAG &DAG) const {
2436 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2437 UI != E; ++UI) {
2438 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2439 if (Memop->getMemoryVT() == MVT::f64) {
2440 Base = N.getOperand(0);
2441 Index = N.getOperand(1);
2442 return true;
2443 }
2444 }
2445 }
2446 return false;
2447}
2448
2449/// isIntS34Immediate - This method tests if value of node given can be
2450/// accurately represented as a sign extension from a 34-bit value. If so,
2451/// this returns true and the immediate.
2452bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2453 if (!isa<ConstantSDNode>(N))
2454 return false;
2455
2456 Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2457 return isInt<34>(Imm);
2458}
2459bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2460 return isIntS34Immediate(Op.getNode(), Imm);
2461}
2462
2463/// SelectAddressRegReg - Given the specified addressed, check to see if it
2464/// can be represented as an indexed [r+r] operation. Returns false if it
2465/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2466/// non-zero and N can be represented by a base register plus a signed 16-bit
2467/// displacement, make a more precise judgement by checking (displacement % \p
2468/// EncodingAlignment).
2470 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2471 MaybeAlign EncodingAlignment) const {
2472 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2473 // a [pc+imm].
2474 if (SelectAddressPCRel(N, Base))
2475 return false;
2476
2477 int16_t Imm = 0;
2478 if (N.getOpcode() == ISD::ADD) {
2479 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2480 // SPE load/store can only handle 8-bit offsets.
2481 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2482 return true;
2483 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2484 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2485 return false; // r+i
2486 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2487 return false; // r+i
2488
2489 Base = N.getOperand(0);
2490 Index = N.getOperand(1);
2491 return true;
2492 } else if (N.getOpcode() == ISD::OR) {
2493 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2494 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2495 return false; // r+i can fold it if we can.
2496
2497 // If this is an or of disjoint bitfields, we can codegen this as an add
2498 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2499 // disjoint.
2500 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2501
2502 if (LHSKnown.Zero.getBoolValue()) {
2503 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2504 // If all of the bits are known zero on the LHS or RHS, the add won't
2505 // carry.
2506 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2507 Base = N.getOperand(0);
2508 Index = N.getOperand(1);
2509 return true;
2510 }
2511 }
2512 }
2513
2514 return false;
2515}
2516
2517// If we happen to be doing an i64 load or store into a stack slot that has
2518// less than a 4-byte alignment, then the frame-index elimination may need to
2519// use an indexed load or store instruction (because the offset may not be a
2520// multiple of 4). The extra register needed to hold the offset comes from the
2521// register scavenger, and it is possible that the scavenger will need to use
2522// an emergency spill slot. As a result, we need to make sure that a spill slot
2523// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2524// stack slot.
2525static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2526 // FIXME: This does not handle the LWA case.
2527 if (VT != MVT::i64)
2528 return;
2529
2530 // NOTE: We'll exclude negative FIs here, which come from argument
2531 // lowering, because there are no known test cases triggering this problem
2532 // using packed structures (or similar). We can remove this exclusion if
2533 // we find such a test case. The reason why this is so test-case driven is
2534 // because this entire 'fixup' is only to prevent crashes (from the
2535 // register scavenger) on not-really-valid inputs. For example, if we have:
2536 // %a = alloca i1
2537 // %b = bitcast i1* %a to i64*
2538 // store i64* a, i64 b
2539 // then the store should really be marked as 'align 1', but is not. If it
2540 // were marked as 'align 1' then the indexed form would have been
2541 // instruction-selected initially, and the problem this 'fixup' is preventing
2542 // won't happen regardless.
2543 if (FrameIdx < 0)
2544 return;
2545
2547 MachineFrameInfo &MFI = MF.getFrameInfo();
2548
2549 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2550 return;
2551
2552 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2553 FuncInfo->setHasNonRISpills();
2554}
2555
2556/// Returns true if the address N can be represented by a base register plus
2557/// a signed 16-bit displacement [r+imm], and if it is not better
2558/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2559/// displacements that are multiples of that value.
2561 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2562 MaybeAlign EncodingAlignment) const {
2563 // FIXME dl should come from parent load or store, not from address
2564 SDLoc dl(N);
2565
2566 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2567 // a [pc+imm].
2568 if (SelectAddressPCRel(N, Base))
2569 return false;
2570
2571 // If this can be more profitably realized as r+r, fail.
2572 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2573 return false;
2574
2575 if (N.getOpcode() == ISD::ADD) {
2576 int16_t imm = 0;
2577 if (isIntS16Immediate(N.getOperand(1), imm) &&
2578 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2579 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2580 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2581 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2582 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2583 } else {
2584 Base = N.getOperand(0);
2585 }
2586 return true; // [r+i]
2587 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2588 // Match LOAD (ADD (X, Lo(G))).
2589 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2590 && "Cannot handle constant offsets yet!");
2591 Disp = N.getOperand(1).getOperand(0); // The global address.
2596 Base = N.getOperand(0);
2597 return true; // [&g+r]
2598 }
2599 } else if (N.getOpcode() == ISD::OR) {
2600 int16_t imm = 0;
2601 if (isIntS16Immediate(N.getOperand(1), imm) &&
2602 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2603 // If this is an or of disjoint bitfields, we can codegen this as an add
2604 // (for better address arithmetic) if the LHS and RHS of the OR are
2605 // provably disjoint.
2606 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2607
2608 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2609 // If all of the bits are known zero on the LHS or RHS, the add won't
2610 // carry.
2611 if (FrameIndexSDNode *FI =
2612 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2613 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2614 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2615 } else {
2616 Base = N.getOperand(0);
2617 }
2618 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2619 return true;
2620 }
2621 }
2622 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2623 // Loading from a constant address.
2624
2625 // If this address fits entirely in a 16-bit sext immediate field, codegen
2626 // this as "d, 0"
2627 int16_t Imm;
2628 if (isIntS16Immediate(CN, Imm) &&
2629 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2630 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2631 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2632 CN->getValueType(0));
2633 return true;
2634 }
2635
2636 // Handle 32-bit sext immediates with LIS + addr mode.
2637 if ((CN->getValueType(0) == MVT::i32 ||
2638 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2639 (!EncodingAlignment ||
2640 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2641 int Addr = (int)CN->getZExtValue();
2642
2643 // Otherwise, break this down into an LIS + disp.
2644 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2645
2646 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2647 MVT::i32);
2648 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2649 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2650 return true;
2651 }
2652 }
2653
2654 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2656 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2657 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2658 } else
2659 Base = N;
2660 return true; // [r+0]
2661}
2662
2663/// Similar to the 16-bit case but for instructions that take a 34-bit
2664/// displacement field (prefixed loads/stores).
2666 SDValue &Base,
2667 SelectionDAG &DAG) const {
2668 // Only on 64-bit targets.
2669 if (N.getValueType() != MVT::i64)
2670 return false;
2671
2672 SDLoc dl(N);
2673 int64_t Imm = 0;
2674
2675 if (N.getOpcode() == ISD::ADD) {
2676 if (!isIntS34Immediate(N.getOperand(1), Imm))
2677 return false;
2678 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2679 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2680 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2681 else
2682 Base = N.getOperand(0);
2683 return true;
2684 }
2685
2686 if (N.getOpcode() == ISD::OR) {
2687 if (!isIntS34Immediate(N.getOperand(1), Imm))
2688 return false;
2689 // If this is an or of disjoint bitfields, we can codegen this as an add
2690 // (for better address arithmetic) if the LHS and RHS of the OR are
2691 // provably disjoint.
2692 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2693 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2694 return false;
2695 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2696 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2697 else
2698 Base = N.getOperand(0);
2699 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2700 return true;
2701 }
2702
2703 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2704 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2705 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2706 return true;
2707 }
2708
2709 return false;
2710}
2711
2712/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2713/// represented as an indexed [r+r] operation.
2715 SDValue &Index,
2716 SelectionDAG &DAG) const {
2717 // Check to see if we can easily represent this as an [r+r] address. This
2718 // will fail if it thinks that the address is more profitably represented as
2719 // reg+imm, e.g. where imm = 0.
2720 if (SelectAddressRegReg(N, Base, Index, DAG))
2721 return true;
2722
2723 // If the address is the result of an add, we will utilize the fact that the
2724 // address calculation includes an implicit add. However, we can reduce
2725 // register pressure if we do not materialize a constant just for use as the
2726 // index register. We only get rid of the add if it is not an add of a
2727 // value and a 16-bit signed constant and both have a single use.
2728 int16_t imm = 0;
2729 if (N.getOpcode() == ISD::ADD &&
2730 (!isIntS16Immediate(N.getOperand(1), imm) ||
2731 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2732 Base = N.getOperand(0);
2733 Index = N.getOperand(1);
2734 return true;
2735 }
2736
2737 // Otherwise, do it the hard way, using R0 as the base register.
2738 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2739 N.getValueType());
2740 Index = N;
2741 return true;
2742}
2743
2744template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2745 Ty *PCRelCand = dyn_cast<Ty>(N);
2746 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2747}
2748
2749/// Returns true if this address is a PC Relative address.
2750/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2751/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2753 // This is a materialize PC Relative node. Always select this as PC Relative.
2754 Base = N;
2755 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2756 return true;
2761 return true;
2762 return false;
2763}
2764
2765/// Returns true if we should use a direct load into vector instruction
2766/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2767static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2768
2769 // If there are any other uses other than scalar to vector, then we should
2770 // keep it as a scalar load -> direct move pattern to prevent multiple
2771 // loads.
2773 if (!LD)
2774 return false;
2775
2776 EVT MemVT = LD->getMemoryVT();
2777 if (!MemVT.isSimple())
2778 return false;
2779 switch(MemVT.getSimpleVT().SimpleTy) {
2780 case MVT::i64:
2781 break;
2782 case MVT::i32:
2783 if (!ST.hasP8Vector())
2784 return false;
2785 break;
2786 case MVT::i16:
2787 case MVT::i8:
2788 if (!ST.hasP9Vector())
2789 return false;
2790 break;
2791 default:
2792 return false;
2793 }
2794
2795 SDValue LoadedVal(N, 0);
2796 if (!LoadedVal.hasOneUse())
2797 return false;
2798
2799 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2800 UI != UE; ++UI)
2801 if (UI.getUse().get().getResNo() == 0 &&
2802 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2803 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2804 return false;
2805
2806 return true;
2807}
2808
2809/// getPreIndexedAddressParts - returns true by value, base pointer and
2810/// offset pointer and addressing mode by reference if the node's address
2811/// can be legally represented as pre-indexed load / store address.
2813 SDValue &Offset,
2815 SelectionDAG &DAG) const {
2816 if (DisablePPCPreinc) return false;
2817
2818 bool isLoad = true;
2819 SDValue Ptr;
2820 EVT VT;
2821 unsigned Alignment;
2822 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2823 Ptr = LD->getBasePtr();
2824 VT = LD->getMemoryVT();
2825 Alignment = LD->getAlignment();
2826 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2827 Ptr = ST->getBasePtr();
2828 VT = ST->getMemoryVT();
2829 Alignment = ST->getAlignment();
2830 isLoad = false;
2831 } else
2832 return false;
2833
2834 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2835 // instructions because we can fold these into a more efficient instruction
2836 // instead, (such as LXSD).
2837 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2838 return false;
2839 }
2840
2841 // PowerPC doesn't have preinc load/store instructions for vectors
2842 if (VT.isVector())
2843 return false;
2844
2845 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2846 // Common code will reject creating a pre-inc form if the base pointer
2847 // is a frame index, or if N is a store and the base pointer is either
2848 // the same as or a predecessor of the value being stored. Check for
2849 // those situations here, and try with swapped Base/Offset instead.
2850 bool Swap = false;
2851
2852 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2853 Swap = true;
2854 else if (!isLoad) {
2855 SDValue Val = cast<StoreSDNode>(N)->getValue();
2856 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2857 Swap = true;
2858 }
2859
2860 if (Swap)
2861 std::swap(Base, Offset);
2862
2863 AM = ISD::PRE_INC;
2864 return true;
2865 }
2866
2867 // LDU/STU can only handle immediates that are a multiple of 4.
2868 if (VT != MVT::i64) {
2869 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2870 return false;
2871 } else {
2872 // LDU/STU need an address with at least 4-byte alignment.
2873 if (Alignment < 4)
2874 return false;
2875
2876 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2877 return false;
2878 }
2879
2880 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2881 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2882 // sext i32 to i64 when addr mode is r+i.
2883 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2884 LD->getExtensionType() == ISD::SEXTLOAD &&
2886 return false;
2887 }
2888
2889 AM = ISD::PRE_INC;
2890 return true;
2891}
2892
2893//===----------------------------------------------------------------------===//
2894// LowerOperation implementation
2895//===----------------------------------------------------------------------===//
2896
2897/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2898/// and LoOpFlags to the target MO flags.
2899static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2900 unsigned &HiOpFlags, unsigned &LoOpFlags,
2901 const GlobalValue *GV = nullptr) {
2902 HiOpFlags = PPCII::MO_HA;
2903 LoOpFlags = PPCII::MO_LO;
2904
2905 // Don't use the pic base if not in PIC relocation model.
2906 if (IsPIC) {
2907 HiOpFlags |= PPCII::MO_PIC_FLAG;
2908 LoOpFlags |= PPCII::MO_PIC_FLAG;
2909 }
2910}
2911
2912static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2913 SelectionDAG &DAG) {
2914 SDLoc DL(HiPart);
2915 EVT PtrVT = HiPart.getValueType();
2916 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2917
2918 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2919 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2920
2921 // With PIC, the first instruction is actually "GR+hi(&G)".
2922 if (isPIC)
2923 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2924 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2925
2926 // Generate non-pic code that has direct accesses to the constant pool.
2927 // The address of the global is just (hi(&g)+lo(&g)).
2928 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2929}
2930
2932 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2933 FuncInfo->setUsesTOCBasePtr();
2934}
2935
2939
2940SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2941 SDValue GA) const {
2942 const bool Is64Bit = Subtarget.isPPC64();
2943 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2944 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2945 : Subtarget.isAIXABI()
2946 ? DAG.getRegister(PPC::R2, VT)
2947 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2948 SDValue Ops[] = { GA, Reg };
2949 return DAG.getMemIntrinsicNode(
2950 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2953}
2954
2955SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2956 SelectionDAG &DAG) const {
2957 EVT PtrVT = Op.getValueType();
2959 const Constant *C = CP->getConstVal();
2960
2961 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2962 // The actual address of the GlobalValue is stored in the TOC.
2963 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2964 if (Subtarget.isUsingPCRelativeCalls()) {
2965 SDLoc DL(CP);
2966 EVT Ty = getPointerTy(DAG.getDataLayout());
2967 SDValue ConstPool = DAG.getTargetConstantPool(
2968 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2969 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2970 }
2971 setUsesTOCBasePtr(DAG);
2972 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2973 return getTOCEntry(DAG, SDLoc(CP), GA);
2974 }
2975
2976 unsigned MOHiFlag, MOLoFlag;
2977 bool IsPIC = isPositionIndependent();
2978 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2979
2980 if (IsPIC && Subtarget.isSVR4ABI()) {
2981 SDValue GA =
2982 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2983 return getTOCEntry(DAG, SDLoc(CP), GA);
2984 }
2985
2986 SDValue CPIHi =
2987 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2988 SDValue CPILo =
2989 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2990 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2991}
2992
2993// For 64-bit PowerPC, prefer the more compact relative encodings.
2994// This trades 32 bits per jump table entry for one or two instructions
2995// on the jump site.
3002
3005 return false;
3006 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3007 return true;
3009}
3010
3012 SelectionDAG &DAG) const {
3013 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3015
3016 switch (getTargetMachine().getCodeModel()) {
3017 case CodeModel::Small:
3018 case CodeModel::Medium:
3020 default:
3021 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3023 }
3024}
3025
3026const MCExpr *
3028 unsigned JTI,
3029 MCContext &Ctx) const {
3030 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3032
3033 switch (getTargetMachine().getCodeModel()) {
3034 case CodeModel::Small:
3035 case CodeModel::Medium:
3037 default:
3038 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3039 }
3040}
3041
3042SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3043 EVT PtrVT = Op.getValueType();
3045
3046 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3047 if (Subtarget.isUsingPCRelativeCalls()) {
3048 SDLoc DL(JT);
3049 EVT Ty = getPointerTy(DAG.getDataLayout());
3050 SDValue GA =
3051 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3052 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3053 return MatAddr;
3054 }
3055
3056 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3057 // The actual address of the GlobalValue is stored in the TOC.
3058 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3059 setUsesTOCBasePtr(DAG);
3060 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3061 return getTOCEntry(DAG, SDLoc(JT), GA);
3062 }
3063
3064 unsigned MOHiFlag, MOLoFlag;
3065 bool IsPIC = isPositionIndependent();
3066 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3067
3068 if (IsPIC && Subtarget.isSVR4ABI()) {
3069 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3071 return getTOCEntry(DAG, SDLoc(GA), GA);
3072 }
3073
3074 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3075 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3076 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3077}
3078
3079SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3080 SelectionDAG &DAG) const {
3081 EVT PtrVT = Op.getValueType();
3083 const BlockAddress *BA = BASDN->getBlockAddress();
3084
3085 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3086 if (Subtarget.isUsingPCRelativeCalls()) {
3087 SDLoc DL(BASDN);
3088 EVT Ty = getPointerTy(DAG.getDataLayout());
3089 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3091 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3092 return MatAddr;
3093 }
3094
3095 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3096 // The actual BlockAddress is stored in the TOC.
3097 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3098 setUsesTOCBasePtr(DAG);
3099 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3100 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3101 }
3102
3103 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3104 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3105 return getTOCEntry(
3106 DAG, SDLoc(BASDN),
3107 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3108
3109 unsigned MOHiFlag, MOLoFlag;
3110 bool IsPIC = isPositionIndependent();
3111 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3112 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3113 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3114 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3115}
3116
3117SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3118 SelectionDAG &DAG) const {
3119 // FIXME: TLS addresses currently use medium model code sequences,
3120 // which is the most useful form. Eventually support for small and
3121 // large models could be added if users need it, at the cost of
3122 // additional complexity.
3124 if (DAG.getTarget().useEmulatedTLS())
3125 return LowerToTLSEmulatedModel(GA, DAG);
3126
3127 SDLoc dl(GA);
3128 const GlobalValue *GV = GA->getGlobal();
3129 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3130 bool is64bit = Subtarget.isPPC64();
3131 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3132 PICLevel::Level picLevel = M->getPICLevel();
3133
3135 TLSModel::Model Model = TM.getTLSModel(GV);
3136
3137 if (Model == TLSModel::LocalExec) {
3138 if (Subtarget.isUsingPCRelativeCalls()) {
3139 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3141 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3142 SDValue MatAddr =
3143 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3144 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3145 }
3146
3147 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3149 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3151 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3152 : DAG.getRegister(PPC::R2, MVT::i32);
3153
3154 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3155 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3156 }
3157
3158 if (Model == TLSModel::InitialExec) {
3159 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3161 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3162 SDValue TGATLS = DAG.getTargetGlobalAddress(
3163 GV, dl, PtrVT, 0,
3165 SDValue TPOffset;
3166 if (IsPCRel) {
3167 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3168 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3170 } else {
3171 SDValue GOTPtr;
3172 if (is64bit) {
3173 setUsesTOCBasePtr(DAG);
3174 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3175 GOTPtr =
3176 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3177 } else {
3178 if (!TM.isPositionIndependent())
3179 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3180 else if (picLevel == PICLevel::SmallPIC)
3181 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3182 else
3183 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3184 }
3185 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3186 }
3187 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3188 }
3189
3190 if (Model == TLSModel::GeneralDynamic) {
3191 if (Subtarget.isUsingPCRelativeCalls()) {
3192 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3194 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3195 }
3196
3197 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3198 SDValue GOTPtr;
3199 if (is64bit) {
3200 setUsesTOCBasePtr(DAG);
3201 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3202 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3203 GOTReg, TGA);
3204 } else {
3205 if (picLevel == PICLevel::SmallPIC)
3206 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3207 else
3208 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3209 }
3210 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3211 GOTPtr, TGA, TGA);
3212 }
3213
3214 if (Model == TLSModel::LocalDynamic) {
3215 if (Subtarget.isUsingPCRelativeCalls()) {
3216 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3218 SDValue MatPCRel =
3219 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3220 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3221 }
3222
3223 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3224 SDValue GOTPtr;
3225 if (is64bit) {
3226 setUsesTOCBasePtr(DAG);
3227 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3228 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3229 GOTReg, TGA);
3230 } else {
3231 if (picLevel == PICLevel::SmallPIC)
3232 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3233 else
3234 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3235 }
3236 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3237 PtrVT, GOTPtr, TGA, TGA);
3238 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3239 PtrVT, TLSAddr, TGA);
3240 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3241 }
3242
3243 llvm_unreachable("Unknown TLS model!");
3244}
3245
3246SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3247 SelectionDAG &DAG) const {
3248 EVT PtrVT = Op.getValueType();
3250 SDLoc DL(GSDN);
3251 const GlobalValue *GV = GSDN->getGlobal();
3252
3253 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3254 // The actual address of the GlobalValue is stored in the TOC.
3255 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3256 if (Subtarget.isUsingPCRelativeCalls()) {
3257 EVT Ty = getPointerTy(DAG.getDataLayout());
3258 if (isAccessedAsGotIndirect(Op)) {
3259 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3262 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3263 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3265 return Load;
3266 } else {
3267 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3269 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3270 }
3271 }
3272 setUsesTOCBasePtr(DAG);
3273 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3274 return getTOCEntry(DAG, DL, GA);
3275 }
3276
3277 unsigned MOHiFlag, MOLoFlag;
3278 bool IsPIC = isPositionIndependent();
3279 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3280
3281 if (IsPIC && Subtarget.isSVR4ABI()) {
3282 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3283 GSDN->getOffset(),
3285 return getTOCEntry(DAG, DL, GA);
3286 }
3287
3288 SDValue GAHi =
3289 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3290 SDValue GALo =
3291 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3292
3293 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3294}
3295
3296SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3297 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3298 SDLoc dl(Op);
3299
3300 if (Op.getValueType() == MVT::v2i64) {
3301 // When the operands themselves are v2i64 values, we need to do something
3302 // special because VSX has no underlying comparison operations for these.
3303 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3304 // Equality can be handled by casting to the legal type for Altivec
3305 // comparisons, everything else needs to be expanded.
3306 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3307 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3308 DAG.getSetCC(dl, MVT::v4i32,
3309 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3310 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3311 CC));
3312 }
3313
3314 return SDValue();
3315 }
3316
3317 // We handle most of these in the usual way.
3318 return Op;
3319 }
3320
3321 // If we're comparing for equality to zero, expose the fact that this is
3322 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3323 // fold the new nodes.
3324 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3325 return V;
3326
3327 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3328 // Leave comparisons against 0 and -1 alone for now, since they're usually
3329 // optimized. FIXME: revisit this when we can custom lower all setcc
3330 // optimizations.
3331 if (C->isAllOnesValue() || C->isNullValue())
3332 return SDValue();
3333 }
3334
3335 // If we have an integer seteq/setne, turn it into a compare against zero
3336 // by xor'ing the rhs with the lhs, which is faster than setting a
3337 // condition register, reading it back out, and masking the correct bit. The
3338 // normal approach here uses sub to do this instead of xor. Using xor exposes
3339 // the result to other bit-twiddling opportunities.
3340 EVT LHSVT = Op.getOperand(0).getValueType();
3341 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3342 EVT VT = Op.getValueType();
3343 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3344 Op.getOperand(1));
3345 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3346 }
3347 return SDValue();
3348}
3349
3350SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3351 SDNode *Node = Op.getNode();
3352 EVT VT = Node->getValueType(0);
3353 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3354 SDValue InChain = Node->getOperand(0);
3355 SDValue VAListPtr = Node->getOperand(1);
3356 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3357 SDLoc dl(Node);
3358
3359 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3360
3361 // gpr_index
3362 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3363 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3364 InChain = GprIndex.getValue(1);
3365
3366 if (VT == MVT::i64) {
3367 // Check if GprIndex is even
3368 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3369 DAG.getConstant(1, dl, MVT::i32));
3370 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3371 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3372 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3373 DAG.getConstant(1, dl, MVT::i32));
3374 // Align GprIndex to be even if it isn't
3375 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3376 GprIndex);
3377 }
3378
3379 // fpr index is 1 byte after gpr
3380 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3381 DAG.getConstant(1, dl, MVT::i32));
3382
3383 // fpr
3384 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3385 FprPtr, MachinePointerInfo(SV), MVT::i8);
3386 InChain = FprIndex.getValue(1);
3387
3388 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3389 DAG.getConstant(8, dl, MVT::i32));
3390
3391 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3392 DAG.getConstant(4, dl, MVT::i32));
3393
3394 // areas
3395 SDValue OverflowArea =
3396 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3397 InChain = OverflowArea.getValue(1);
3398
3399 SDValue RegSaveArea =
3400 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3401 InChain = RegSaveArea.getValue(1);
3402
3403 // select overflow_area if index > 8
3404 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3405 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3406
3407 // adjustment constant gpr_index * 4/8
3408 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3409 VT.isInteger() ? GprIndex : FprIndex,
3410 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3411 MVT::i32));
3412
3413 // OurReg = RegSaveArea + RegConstant
3414 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3415 RegConstant);
3416
3417 // Floating types are 32 bytes into RegSaveArea
3418 if (VT.isFloatingPoint())
3419 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3420 DAG.getConstant(32, dl, MVT::i32));
3421
3422 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3423 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3424 VT.isInteger() ? GprIndex : FprIndex,
3425 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3426 MVT::i32));
3427
3428 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3429 VT.isInteger() ? VAListPtr : FprPtr,
3431
3432 // determine if we should load from reg_save_area or overflow_area
3433 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3434
3435 // increase overflow_area by 4/8 if gpr/fpr > 8
3436 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3437 DAG.getConstant(VT.isInteger() ? 4 : 8,
3438 dl, MVT::i32));
3439
3440 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3441 OverflowAreaPlusN);
3442
3443 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3445
3446 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3447}
3448
3449SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3450 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3451
3452 // We have to copy the entire va_list struct:
3453 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3454 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3455 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3456 false, true, false, MachinePointerInfo(),
3458}
3459
3460SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3461 SelectionDAG &DAG) const {
3462 if (Subtarget.isAIXABI())
3463 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3464
3465 return Op.getOperand(0);
3466}
3467
3468SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3471
3472 assert((Op.getOpcode() == ISD::INLINEASM ||
3473 Op.getOpcode() == ISD::INLINEASM_BR) &&
3474 "Expecting Inline ASM node.");
3475
3476 // If an LR store is already known to be required then there is not point in
3477 // checking this ASM as well.
3478 if (MFI.isLRStoreRequired())
3479 return Op;
3480
3481 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3482 // type MVT::Glue. We want to ignore this last operand if that is the case.
3483 unsigned NumOps = Op.getNumOperands();
3484 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3485 --NumOps;
3486
3487 // Check all operands that may contain the LR.
3488 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3489 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3490 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3491 ++i; // Skip the ID value.
3492
3493 switch (InlineAsm::getKind(Flags)) {
3494 default:
3495 llvm_unreachable("Bad flags!");
3499 i += NumVals;
3500 break;
3504 for (; NumVals; --NumVals, ++i) {
3505 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3506 if (Reg != PPC::LR && Reg != PPC::LR8)
3507 continue;
3508 MFI.setLRStoreRequired();
3509 return Op;
3510 }
3511 break;
3512 }
3513 }
3514 }
3515
3516 return Op;
3517}
3518
3519SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3520 SelectionDAG &DAG) const {
3521 if (Subtarget.isAIXABI())
3522 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3523
3524 SDValue Chain = Op.getOperand(0);
3525 SDValue Trmp = Op.getOperand(1); // trampoline
3526 SDValue FPtr = Op.getOperand(2); // nested function
3527 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3528 SDLoc dl(Op);
3529
3530 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3531 bool isPPC64 = (PtrVT == MVT::i64);
3532 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3533
3536
3537 Entry.Ty = IntPtrTy;
3538 Entry.Node = Trmp; Args.push_back(Entry);
3539
3540 // TrampSize == (isPPC64 ? 48 : 40);
3541 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3542 isPPC64 ? MVT::i64 : MVT::i32);
3543 Args.push_back(Entry);
3544
3545 Entry.Node = FPtr; Args.push_back(Entry);
3546 Entry.Node = Nest; Args.push_back(Entry);
3547
3548 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3550 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3552 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3553
3554 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3555 return CallResult.second;
3556}
3557
3558SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3560 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3561 EVT PtrVT = getPointerTy(MF.getDataLayout());
3562
3563 SDLoc dl(Op);
3564
3565 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3566 // vastart just stores the address of the VarArgsFrameIndex slot into the
3567 // memory location argument.
3568 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3569 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3570 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3571 MachinePointerInfo(SV));
3572 }
3573
3574 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3575 // We suppose the given va_list is already allocated.
3576 //
3577 // typedef struct {
3578 // char gpr; /* index into the array of 8 GPRs
3579 // * stored in the register save area
3580 // * gpr=0 corresponds to r3,
3581 // * gpr=1 to r4, etc.
3582 // */
3583 // char fpr; /* index into the array of 8 FPRs
3584 // * stored in the register save area
3585 // * fpr=0 corresponds to f1,
3586 // * fpr=1 to f2, etc.
3587 // */
3588 // char *overflow_arg_area;
3589 // /* location on stack that holds
3590 // * the next overflow argument
3591 // */
3592 // char *reg_save_area;
3593 // /* where r3:r10 and f1:f8 (if saved)
3594 // * are stored
3595 // */
3596 // } va_list[1];
3597
3598 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3599 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3600 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3601 PtrVT);
3602 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3603 PtrVT);
3604
3605 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3606 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3607
3608 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3609 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3610
3611 uint64_t FPROffset = 1;
3612 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3613
3614 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3615
3616 // Store first byte : number of int regs
3617 SDValue firstStore =
3618 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3620 uint64_t nextOffset = FPROffset;
3621 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3622 ConstFPROffset);
3623
3624 // Store second byte : number of float regs
3625 SDValue secondStore =
3626 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3627 MachinePointerInfo(SV, nextOffset), MVT::i8);
3628 nextOffset += StackOffset;
3629 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3630
3631 // Store second word : arguments given on stack
3632 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3633 MachinePointerInfo(SV, nextOffset));
3634 nextOffset += FrameOffset;
3635 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3636
3637 // Store third word : arguments given in registers
3638 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3639 MachinePointerInfo(SV, nextOffset));
3640}
3641
3642/// FPR - The set of FP registers that should be allocated for arguments
3643/// on Darwin and AIX.
3644static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3645 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3646 PPC::F11, PPC::F12, PPC::F13};
3647
3648/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3649/// the stack.
3650static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3651 unsigned PtrByteSize) {
3652 unsigned ArgSize = ArgVT.getStoreSize();
3653 if (Flags.isByVal())
3654 ArgSize = Flags.getByValSize();
3655
3656 // Round up to multiples of the pointer size, except for array members,
3657 // which are always packed.
3658 if (!Flags.isInConsecutiveRegs())
3659 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3660
3661 return ArgSize;
3662}
3663
3664/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3665/// on the stack.
3667 ISD::ArgFlagsTy Flags,
3668 unsigned PtrByteSize) {
3669 Align Alignment(PtrByteSize);
3670
3671 // Altivec parameters are padded to a 16 byte boundary.
3672 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3673 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3674 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3675 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3676 Alignment = Align(16);
3677
3678 // ByVal parameters are aligned as requested.
3679 if (Flags.isByVal()) {
3680 auto BVAlign = Flags.getNonZeroByValAlign();
3681 if (BVAlign > PtrByteSize) {
3682 if (BVAlign.value() % PtrByteSize != 0)
3684 "ByVal alignment is not a multiple of the pointer size");
3685
3686 Alignment = BVAlign;
3687 }
3688 }
3689
3690 // Array members are always packed to their original alignment.
3691 if (Flags.isInConsecutiveRegs()) {
3692 // If the array member was split into multiple registers, the first
3693 // needs to be aligned to the size of the full type. (Except for
3694 // ppcf128, which is only aligned as its f64 components.)
3695 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3696 Alignment = Align(OrigVT.getStoreSize());
3697 else
3698 Alignment = Align(ArgVT.getStoreSize());
3699 }
3700
3701 return Alignment;
3702}
3703
3704/// CalculateStackSlotUsed - Return whether this argument will use its
3705/// stack slot (instead of being passed in registers). ArgOffset,
3706/// AvailableFPRs, and AvailableVRs must hold the current argument
3707/// position, and will be updated to account for this argument.
3708static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3709 unsigned PtrByteSize, unsigned LinkageSize,
3710 unsigned ParamAreaSize, unsigned &ArgOffset,
3711 unsigned &AvailableFPRs,
3712 unsigned &AvailableVRs) {
3713 bool UseMemory = false;
3714
3715 // Respect alignment of argument on the stack.
3716 Align Alignment =
3717 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3718 ArgOffset = alignTo(ArgOffset, Alignment);
3719 // If there's no space left in the argument save area, we must
3720 // use memory (this check also catches zero-sized arguments).
3721 if (ArgOffset >= LinkageSize + ParamAreaSize)
3722 UseMemory = true;
3723
3724 // Allocate argument on the stack.
3725 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3726 if (Flags.isInConsecutiveRegsLast())
3727 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3728 // If we overran the argument save area, we must use memory
3729 // (this check catches arguments passed partially in memory)
3730 if (ArgOffset > LinkageSize + ParamAreaSize)
3731 UseMemory = true;
3732
3733 // However, if the argument is actually passed in an FPR or a VR,
3734 // we don't use memory after all.
3735 if (!Flags.isByVal()) {
3736 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3737 if (AvailableFPRs > 0) {
3738 --AvailableFPRs;
3739 return false;
3740 }
3741 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3742 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3743 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3744 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3745 if (AvailableVRs > 0) {
3746 --AvailableVRs;
3747 return false;
3748 }
3749 }
3750
3751 return UseMemory;
3752}
3753
3754/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3755/// ensure minimum alignment required for target.
3757 unsigned NumBytes) {
3758 return alignTo(NumBytes, Lowering->getStackAlign());
3759}
3760
3761SDValue PPCTargetLowering::LowerFormalArguments(
3762 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3763 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3764 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3765 if (Subtarget.isAIXABI())
3766 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3767 InVals);
3768 if (Subtarget.is64BitELFABI())
3769 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3770 InVals);
3771 assert(Subtarget.is32BitELFABI());
3772 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3773 InVals);
3774}
3775
3776SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3777 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3778 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3779 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3780
3781 // 32-bit SVR4 ABI Stack Frame Layout:
3782 // +-----------------------------------+
3783 // +--> | Back chain |
3784 // | +-----------------------------------+
3785 // | | Floating-point register save area |
3786 // | +-----------------------------------+
3787 // | | General register save area |
3788 // | +-----------------------------------+
3789 // | | CR save word |
3790 // | +-----------------------------------+
3791 // | | VRSAVE save word |
3792 // | +-----------------------------------+
3793 // | | Alignment padding |
3794 // | +-----------------------------------+
3795 // | | Vector register save area |
3796 // | +-----------------------------------+
3797 // | | Local variable space |
3798 // | +-----------------------------------+
3799 // | | Parameter list area |
3800 // | +-----------------------------------+
3801 // | | LR save word |
3802 // | +-----------------------------------+
3803 // SP--> +--- | Back chain |
3804 // +-----------------------------------+
3805 //
3806 // Specifications:
3807 // System V Application Binary Interface PowerPC Processor Supplement
3808 // AltiVec Technology Programming Interface Manual
3809
3811 MachineFrameInfo &MFI = MF.getFrameInfo();
3812 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3813
3814 EVT PtrVT = getPointerTy(MF.getDataLayout());
3815 // Potential tail calls could cause overwriting of argument stack slots.
3816 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3817 (CallConv == CallingConv::Fast));
3818 const Align PtrAlign(4);
3819
3820 // Assign locations to all of the incoming arguments.
3822 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3823 *DAG.getContext());
3824
3825 // Reserve space for the linkage area on the stack.
3826 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3827 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3828 if (useSoftFloat())
3829 CCInfo.PreAnalyzeFormalArguments(Ins);
3830
3831 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3832 CCInfo.clearWasPPCF128();
3833
3834 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3835 CCValAssign &VA = ArgLocs[i];
3836
3837 // Arguments stored in registers.
3838 if (VA.isRegLoc()) {
3839 const TargetRegisterClass *RC;
3840 EVT ValVT = VA.getValVT();
3841
3842 switch (ValVT.getSimpleVT().SimpleTy) {
3843 default:
3844 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3845 case MVT::i1:
3846 case MVT::i32:
3847 RC = &PPC::GPRCRegClass;
3848 break;
3849 case MVT::f32:
3850 if (Subtarget.hasP8Vector())
3851 RC = &PPC::VSSRCRegClass;
3852 else if (Subtarget.hasSPE())
3853 RC = &PPC::GPRCRegClass;
3854 else
3855 RC = &PPC::F4RCRegClass;
3856 break;
3857 case MVT::f64:
3858 if (Subtarget.hasVSX())
3859 RC = &PPC::VSFRCRegClass;
3860 else if (Subtarget.hasSPE())
3861 // SPE passes doubles in GPR pairs.
3862 RC = &PPC::GPRCRegClass;
3863 else
3864 RC = &PPC::F8RCRegClass;
3865 break;
3866 case MVT::v16i8:
3867 case MVT::v8i16:
3868 case MVT::v4i32:
3869 RC = &PPC::VRRCRegClass;
3870 break;
3871 case MVT::v4f32:
3872 RC = &PPC::VRRCRegClass;
3873 break;
3874 case MVT::v2f64:
3875 case MVT::v2i64:
3876 RC = &PPC::VRRCRegClass;
3877 break;
3878 }
3879
3880 SDValue ArgValue;
3881 // Transform the arguments stored in physical registers into
3882 // virtual ones.
3883 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3884 assert(i + 1 < e && "No second half of double precision argument");
3885 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3886 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3887 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3888 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3889 if (!Subtarget.isLittleEndian())
3890 std::swap (ArgValueLo, ArgValueHi);
3891 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3892 ArgValueHi);
3893 } else {
3894 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3895 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3896 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3897 if (ValVT == MVT::i1)
3898 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3899 }
3900
3901 InVals.push_back(ArgValue);
3902 } else {
3903 // Argument stored in memory.
3904 assert(VA.isMemLoc());
3905
3906 // Get the extended size of the argument type in stack
3907 unsigned ArgSize = VA.getLocVT().getStoreSize();
3908 // Get the actual size of the argument type
3909 unsigned ObjSize = VA.getValVT().getStoreSize();
3910 unsigned ArgOffset = VA.getLocMemOffset();
3911 // Stack objects in PPC32 are right justified.
3912 ArgOffset += ArgSize - ObjSize;
3913 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3914
3915 // Create load nodes to retrieve arguments from the stack.
3916 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3917 InVals.push_back(
3918 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3919 }
3920 }
3921
3922 // Assign locations to all of the incoming aggregate by value arguments.
3923 // Aggregates passed by value are stored in the local variable space of the
3924 // caller's stack frame, right above the parameter list area.
3925 SmallVector<CCValAssign, 16> ByValArgLocs;
3926 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3927 ByValArgLocs, *DAG.getContext());
3928
3929 // Reserve stack space for the allocations in CCInfo.
3930 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3931
3932 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3933
3934 // Area that is at least reserved in the caller of this function.
3935 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3936 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3937
3938 // Set the size that is at least reserved in caller of this function. Tail
3939 // call optimized function's reserved stack space needs to be aligned so that
3940 // taking the difference between two stack areas will result in an aligned
3941 // stack.
3942 MinReservedArea =
3943 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3944 FuncInfo->setMinReservedArea(MinReservedArea);
3945
3947
3948 // If the function takes variable number of arguments, make a frame index for
3949 // the start of the first vararg value... for expansion of llvm.va_start.
3950 if (isVarArg) {
3951 static const MCPhysReg GPArgRegs[] = {
3952 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3953 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3954 };
3955 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3956
3957 static const MCPhysReg FPArgRegs[] = {
3958 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3959 PPC::F8
3960 };
3961 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3962
3963 if (useSoftFloat() || hasSPE())
3964 NumFPArgRegs = 0;
3965
3966 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3967 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3968
3969 // Make room for NumGPArgRegs and NumFPArgRegs.
3970 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3971 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3972
3973 FuncInfo->setVarArgsStackOffset(
3974 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3975 CCInfo.getNextStackOffset(), true));
3976
3977 FuncInfo->setVarArgsFrameIndex(
3978 MFI.CreateStackObject(Depth, Align(8), false));
3979 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3980
3981 // The fixed integer arguments of a variadic function are stored to the
3982 // VarArgsFrameIndex on the stack so that they may be loaded by
3983 // dereferencing the result of va_next.
3984 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3985 // Get an existing live-in vreg, or add a new one.
3986 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3987 if (!VReg)
3988 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3989
3990 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3991 SDValue Store =
3992 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3993 MemOps.push_back(Store);
3994 // Increment the address by four for the next argument to store
3995 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3996 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3997 }
3998
3999 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4000 // is set.
4001 // The double arguments are stored to the VarArgsFrameIndex
4002 // on the stack.
4003 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4004 // Get an existing live-in vreg, or add a new one.
4005 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4006 if (!VReg)
4007 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4008
4009 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4010 SDValue Store =
4011 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4012 MemOps.push_back(Store);
4013 // Increment the address by eight for the next argument to store
4014 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4015 PtrVT);
4016 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4017 }
4018 }
4019
4020 if (!MemOps.empty())
4021 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4022
4023 return Chain;
4024}
4025
4026// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4027// value to MVT::i64 and then truncate to the correct register size.
4028SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4029 EVT ObjectVT, SelectionDAG &DAG,
4030 SDValue ArgVal,
4031 const SDLoc &dl) const {
4032 if (Flags.isSExt())
4033 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4034 DAG.getValueType(ObjectVT));
4035 else if (Flags.isZExt())
4036 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4037 DAG.getValueType(ObjectVT));
4038
4039 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4040}
4041
4042SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4043 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4044 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4045 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4046 // TODO: add description of PPC stack frame format, or at least some docs.
4047 //
4048 bool isELFv2ABI = Subtarget.isELFv2ABI();
4049 bool isLittleEndian = Subtarget.isLittleEndian();
4051 MachineFrameInfo &MFI = MF.getFrameInfo();
4052 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4053
4054 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4055 "fastcc not supported on varargs functions");
4056
4057 EVT PtrVT = getPointerTy(MF.getDataLayout());
4058 // Potential tail calls could cause overwriting of argument stack slots.
4059 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4060 (CallConv == CallingConv::Fast));
4061 unsigned PtrByteSize = 8;
4062 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4063
4064 static const MCPhysReg GPR[] = {
4065 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4066 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4067 };
4068 static const MCPhysReg VR[] = {
4069 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4070 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4071 };
4072
4073 const unsigned Num_GPR_Regs = array_lengthof(GPR);
4074 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4075 const unsigned Num_VR_Regs = array_lengthof(VR);
4076
4077 // Do a first pass over the arguments to determine whether the ABI
4078 // guarantees that our caller has allocated the parameter save area
4079 // on its stack frame. In the ELFv1 ABI, this is always the case;
4080 // in the ELFv2 ABI, it is true if this is a vararg function or if
4081 // any parameter is located in a stack slot.
4082
4083 bool HasParameterArea = !isELFv2ABI || isVarArg;
4084 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4085 unsigned NumBytes = LinkageSize;
4086 unsigned AvailableFPRs = Num_FPR_Regs;
4087 unsigned AvailableVRs = Num_VR_Regs;
4088 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4089 if (Ins[i].Flags.isNest())
4090 continue;
4091
4092 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4093 PtrByteSize, LinkageSize, ParamAreaSize,
4094 NumBytes, AvailableFPRs, AvailableVRs))
4095 HasParameterArea = true;
4096 }
4097
4098 // Add DAG nodes to load the arguments or copy them out of registers. On
4099 // entry to a function on PPC, the arguments start after the linkage area,
4100 // although the first ones are often in registers.
4101
4102 unsigned ArgOffset = LinkageSize;
4103 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4106 unsigned CurArgIdx = 0;
4107 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4108 SDValue ArgVal;
4109 bool needsLoad = false;
4110 EVT ObjectVT = Ins[ArgNo].VT;
4111 EVT OrigVT = Ins[ArgNo].ArgVT;
4112 unsigned ObjSize = ObjectVT.getStoreSize();
4113 unsigned ArgSize = ObjSize;
4114 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4115 if (Ins[ArgNo].isOrigArg()) {
4116 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4117 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4118 }
4119 // We re-align the argument offset for each argument, except when using the
4120 // fast calling convention, when we need to make sure we do that only when
4121 // we'll actually use a stack slot.
4122 unsigned CurArgOffset;
4123 Align Alignment;
4124 auto ComputeArgOffset = [&]() {
4125 /* Respect alignment of argument on the stack. */
4126 Alignment =
4127 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4128 ArgOffset = alignTo(ArgOffset, Alignment);
4129 CurArgOffset = ArgOffset;
4130 };
4131
4132 if (CallConv != CallingConv::Fast) {
4133 ComputeArgOffset();
4134
4135 /* Compute GPR index associated with argument offset. */
4136 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4137 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4138 }
4139
4140 // FIXME the codegen can be much improved in some cases.
4141 // We do not have to keep everything in memory.
4142 if (Flags.isByVal()) {
4143 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4144
4145 if (CallConv == CallingConv::Fast)
4146 ComputeArgOffset();
4147
4148 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4149 ObjSize = Flags.getByValSize();
4150 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4151 // Empty aggregate parameters do not take up registers. Examples:
4152 // struct { } a;
4153 // union { } b;
4154 // int c[0];
4155 // etc. However, we have to provide a place-holder in InVals, so
4156 // pretend we have an 8-byte item at the current address for that
4157 // purpose.
4158 if (!ObjSize) {
4159 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4160 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4161 InVals.push_back(FIN);
4162 continue;
4163 }
4164
4165 // Create a stack object covering all stack doublewords occupied
4166 // by the argument. If the argument is (fully or partially) on
4167 // the stack, or if the argument is fully in registers but the
4168 // caller has allocated the parameter save anyway, we can refer
4169 // directly to the caller's stack frame. Otherwise, create a
4170 // local copy in our own frame.
4171 int FI;
4172 if (HasParameterArea ||
4173 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4174 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4175 else
4176 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4177 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4178
4179 // Handle aggregates smaller than 8 bytes.
4180 if (ObjSize < PtrByteSize) {
4181 // The value of the object is its address, which differs from the
4182 // address of the enclosing doubleword on big-endian systems.
4183 SDValue Arg = FIN;
4184 if (!isLittleEndian) {
4185 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4186 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4187 }
4188 InVals.push_back(Arg);
4189
4190 if (GPR_idx != Num_GPR_Regs) {
4191 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4192 FuncInfo->addLiveInAttr(VReg, Flags);
4193 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4194 SDValue Store;
4195
4196 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4197 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4198 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4199 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4200 MachinePointerInfo(&*FuncArg), ObjType);
4201 } else {
4202 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4203 // store the whole register as-is to the parameter save area
4204 // slot.
4205 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4206 MachinePointerInfo(&*FuncArg));
4207 }
4208
4209 MemOps.push_back(Store);
4210 }
4211 // Whether we copied from a register or not, advance the offset
4212 // into the parameter save area by a full doubleword.
4213 ArgOffset += PtrByteSize;
4214 continue;
4215 }
4216
4217 // The value of the object is its address, which is the address of
4218 // its first stack doubleword.
4219 InVals.push_back(FIN);
4220
4221 // Store whatever pieces of the object are in registers to memory.
4222 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4223 if (GPR_idx == Num_GPR_Regs)
4224 break;
4225
4226 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4227 FuncInfo->addLiveInAttr(VReg, Flags);
4228 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4229 SDValue Addr = FIN;
4230 if (j) {
4231 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4232 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4233 }
4234 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4235 MachinePointerInfo(&*FuncArg, j));
4236 MemOps.push_back(Store);
4237 ++GPR_idx;
4238 }
4239 ArgOffset += ArgSize;
4240 continue;
4241 }
4242
4243 switch (ObjectVT.getSimpleVT().SimpleTy) {
4244 default: llvm_unreachable("Unhandled argument type!");
4245 case MVT::i1:
4246 case MVT::i32:
4247 case MVT::i64:
4248 if (Flags.isNest()) {
4249 // The 'nest' parameter, if any, is passed in R11.
4250 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4251 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4252
4253 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4254 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4255
4256 break;
4257 }
4258
4259 // These can be scalar arguments or elements of an integer array type
4260 // passed directly. Clang may use those instead of "byval" aggregate
4261 // types to avoid forcing arguments to memory unnecessarily.
4262 if (GPR_idx != Num_GPR_Regs) {
4263 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4264 FuncInfo->addLiveInAttr(VReg, Flags);
4265 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4266
4267 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4268 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4269 // value to MVT::i64 and then truncate to the correct register size.
4270 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4271 } else {
4272 if (CallConv == CallingConv::Fast)
4273 ComputeArgOffset();
4274
4275 needsLoad = true;
4276 ArgSize = PtrByteSize;
4277 }
4278 if (CallConv != CallingConv::Fast || needsLoad)
4279 ArgOffset += 8;
4280 break;
4281
4282 case MVT::f32:
4283 case MVT::f64:
4284 // These can be scalar arguments or elements of a float array type
4285 // passed directly. The latter are used to implement ELFv2 homogenous
4286 // float aggregates.
4287 if (FPR_idx != Num_FPR_Regs) {
4288 unsigned VReg;
4289
4290 if (ObjectVT == MVT::f32)
4291 VReg = MF.addLiveIn(FPR[FPR_idx],
4292 Subtarget.hasP8Vector()
4293 ? &PPC::VSSRCRegClass
4294 : &PPC::F4RCRegClass);
4295 else
4296 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4297 ? &PPC::VSFRCRegClass
4298 : &PPC::F8RCRegClass);
4299
4300 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4301 ++FPR_idx;
4302 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4303 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4304 // once we support fp <-> gpr moves.
4305
4306 // This can only ever happen in the presence of f32 array types,
4307 // since otherwise we never run out of FPRs before running out
4308 // of GPRs.
4309 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4310 FuncInfo->addLiveInAttr(VReg, Flags);
4311 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4312
4313 if (ObjectVT == MVT::f32) {
4314 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4315 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4316 DAG.getConstant(32, dl, MVT::i32));
4317 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4318 }
4319
4320 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4321 } else {
4322 if (CallConv == CallingConv::Fast)
4323 ComputeArgOffset();
4324
4325 needsLoad = true;
4326 }
4327
4328 // When passing an array of floats, the array occupies consecutive
4329 // space in the argument area; only round up to the next doubleword
4330 // at the end of the array. Otherwise, each float takes 8 bytes.
4331 if (CallConv != CallingConv::Fast || needsLoad) {
4332 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4333 ArgOffset += ArgSize;
4334 if (Flags.isInConsecutiveRegsLast())
4335 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4336 }
4337 break;
4338 case MVT::v4f32:
4339 case MVT::v4i32:
4340 case MVT::v8i16:
4341 case MVT::v16i8:
4342 case MVT::v2f64:
4343 case MVT::v2i64:
4344 case MVT::v1i128:
4345 case MVT::f128:
4346 // These can be scalar arguments or elements of a vector array type
4347 // passed directly. The latter are used to implement ELFv2 homogenous
4348 // vector aggregates.
4349 if (VR_idx != Num_VR_Regs) {
4350 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4351 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4352 ++VR_idx;
4353 } else {
4354 if (CallConv == CallingConv::Fast)
4355 ComputeArgOffset();
4356 needsLoad = true;
4357 }
4358 if (CallConv != CallingConv::Fast || needsLoad)
4359 ArgOffset += 16;
4360 break;
4361 }
4362
4363 // We need to load the argument to a virtual register if we determined
4364 // above that we ran out of physical registers of the appropriate type.
4365 if (needsLoad) {
4366 if (ObjSize < ArgSize && !isLittleEndian)
4367 CurArgOffset += ArgSize - ObjSize;
4368 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4369 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4370 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4371 }
4372
4373 InVals.push_back(ArgVal);
4374 }
4375
4376 // Area that is at least reserved in the caller of this function.
4377 unsigned MinReservedArea;
4378 if (HasParameterArea)
4379 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4380 else
4381 MinReservedArea = LinkageSize;
4382
4383 // Set the size that is at least reserved in caller of this function. Tail
4384 // call optimized functions' reserved stack space needs to be aligned so that
4385 // taking the difference between two stack areas will result in an aligned
4386 // stack.
4387 MinReservedArea =
4388 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4389 FuncInfo->setMinReservedArea(MinReservedArea);
4390
4391 // If the function takes variable number of arguments, make a frame index for
4392 // the start of the first vararg value... for expansion of llvm.va_start.
4393 // On ELFv2ABI spec, it writes:
4394 // C programs that are intended to be *portable* across different compilers
4395 // and architectures must use the header file <stdarg.h> to deal with variable
4396 // argument lists.
4397 if (isVarArg && MFI.hasVAStart()) {
4398 int Depth = ArgOffset;
4399
4400 FuncInfo->setVarArgsFrameIndex(
4401 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4402 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4403
4404 // If this function is vararg, store any remaining integer argument regs
4405 // to their spots on the stack so that they may be loaded by dereferencing
4406 // the result of va_next.
4407 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4408 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4409 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4410 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4411 SDValue Store =
4412 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4413 MemOps.push_back(Store);
4414 // Increment the address by four for the next argument to store
4415 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4416 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4417 }
4418 }
4419
4420 if (!MemOps.empty())
4421 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4422
4423 return Chain;
4424}
4425
4426/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4427/// adjusted to accommodate the arguments for the tailcall.
4428static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4429 unsigned ParamSize) {
4430
4431 if (!isTailCall) return 0;
4432
4434 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4435 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4436 // Remember only if the new adjustment is bigger.
4437 if (SPDiff < FI->getTailCallSPDelta())
4438 FI->setTailCallSPDelta(SPDiff);
4439
4440 return SPDiff;
4441}
4442
4444
4445static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4446 const TargetMachine &TM) {
4447 // It does not make sense to call callsShareTOCBase() with a caller that
4448 // is PC Relative since PC Relative callers do not have a TOC.
4449#ifndef NDEBUG
4450 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4451 assert(!STICaller->isUsingPCRelativeCalls() &&
4452 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4453#endif
4454
4455 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4456 // don't have enough information to determine if the caller and callee share
4457 // the same TOC base, so we have to pessimistically assume they don't for
4458 // correctness.
4460 if (!G)
4461 return false;
4462
4463 const GlobalValue *GV = G->getGlobal();
4464
4465 // If the callee is preemptable, then the static linker will use a plt-stub
4466 // which saves the toc to the stack, and needs a nop after the call
4467 // instruction to convert to a toc-restore.
4468 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4469 return false;
4470
4471 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4472 // We may need a TOC restore in the situation where the caller requires a
4473 // valid TOC but the callee is PC Relative and does not.
4474 const Function *F = dyn_cast<Function>(GV);
4475 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4476
4477 // If we have an Alias we can try to get the function from there.
4478 if (Alias) {
4479 const GlobalObject *GlobalObj = Alias->getBaseObject();
4480 F = dyn_cast<Function>(GlobalObj);
4481 }
4482
4483 // If we still have no valid function pointer we do not have enough
4484 // information to determine if the callee uses PC Relative calls so we must
4485 // assume that it does.
4486 if (!F)
4487 return false;
4488
4489 // If the callee uses PC Relative we cannot guarantee that the callee won't
4490 // clobber the TOC of the caller and so we must assume that the two
4491 // functions do not share a TOC base.
4492 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4493 if (STICallee->isUsingPCRelativeCalls())
4494 return false;
4495
4496 // If the GV is not a strong definition then we need to assume it can be
4497 // replaced by another function at link time. The function that replaces
4498 // it may not share the same TOC as the caller since the callee may be
4499 // replaced by a PC Relative version of the same function.
4500 if (!GV->isStrongDefinitionForLinker())
4501 return false;
4502
4503 // The medium and large code models are expected to provide a sufficiently
4504 // large TOC to provide all data addressing needs of a module with a
4505 // single TOC.
4506 if (CodeModel::Medium == TM.getCodeModel() ||
4507 CodeModel::Large == TM.getCodeModel())
4508 return true;
4509
4510 // Any explicitly-specified sections and section prefixes must also match.
4511 // Also, if we're using -ffunction-sections, then each function is always in
4512 // a different section (the same is true for COMDAT functions).
4513 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4514 GV->getSection() != Caller->getSection())
4515 return false;
4516 if (const auto *F = dyn_cast<Function>(GV)) {
4517 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4518 return false;
4519 }
4520
4521 return true;
4522}
4523
4524static bool
4526 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4527 assert(Subtarget.is64BitELFABI());
4528
4529 const unsigned PtrByteSize = 8;
4530 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531
4532 static const MCPhysReg GPR[] = {
4533 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535 };
4536 static const MCPhysReg VR[] = {
4537 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539 };
4540
4541 const unsigned NumGPRs = array_lengthof(GPR);
4542 const unsigned NumFPRs = 13;
4543 const unsigned NumVRs = array_lengthof(VR);
4544 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4545
4546 unsigned NumBytes = LinkageSize;
4547 unsigned AvailableFPRs = NumFPRs;
4548 unsigned AvailableVRs = NumVRs;
4549
4550 for (const ISD::OutputArg& Param : Outs) {
4551 if (Param.Flags.isNest()) continue;
4552
4553 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4554 LinkageSize, ParamAreaSize, NumBytes,
4555 AvailableFPRs, AvailableVRs))
4556 return true;
4557 }
4558 return false;
4559}
4560
4561static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4562 if (CB.arg_size() != CallerFn->arg_size())
4563 return false;
4564
4565 auto CalleeArgIter = CB.arg_begin();
4566 auto CalleeArgEnd = CB.arg_end();
4567 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4568
4569 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4570 const Value* CalleeArg = *CalleeArgIter;
4571 const Value* CallerArg = &(*CallerArgIter);
4572 if (CalleeArg == CallerArg)
4573 continue;
4574
4575 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4576 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4577 // }
4578 // 1st argument of callee is undef and has the same type as caller.
4579 if (CalleeArg->getType() == CallerArg->getType() &&
4580 isa<UndefValue>(CalleeArg))
4581 continue;
4582
4583 return false;
4584 }
4585
4586 return true;
4587}
4588
4589// Returns true if TCO is possible between the callers and callees
4590// calling conventions.
4591static bool
4593 CallingConv::ID CalleeCC) {
4594 // Tail calls are possible with fastcc and ccc.
4595 auto isTailCallableCC = [] (CallingConv::ID CC){
4596 return CC == CallingConv::C || CC == CallingConv::Fast;
4597 };
4598 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4599 return false;
4600
4601 // We can safely tail call both fastcc and ccc callees from a c calling
4602 // convention caller. If the caller is fastcc, we may have less stack space
4603 // than a non-fastcc caller with the same signature so disable tail-calls in
4604 // that case.
4605 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4606}
4607
4608bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4609 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4611 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4612 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4613
4614 if (DisableSCO && !TailCallOpt) return false;
4615
4616 // Variadic argument functions are not supported.
4617 if (isVarArg) return false;
4618
4619 auto &Caller = DAG.getMachineFunction().getFunction();
4620 // Check that the calling conventions are compatible for tco.
4621 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4622 return false;
4623
4624 // Caller contains any byval parameter is not supported.
4625 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4626 return false;
4627
4628 // Callee contains any byval parameter is not supported, too.
4629 // Note: This is a quick work around, because in some cases, e.g.
4630 // caller's stack size > callee's stack size, we are still able to apply
4631 // sibling call optimization. For example, gcc is able to do SCO for caller1
4632 // in the following example, but not for caller2.
4633 // struct test {
4634 // long int a;
4635 // char ary[56];
4636 // } gTest;
4637 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4638 // b->a = v.a;
4639 // return 0;
4640 // }
4641 // void caller1(struct test a, struct test c, struct test *b) {
4642 // callee(gTest, b); }
4643 // void caller2(struct test *b) { callee(gTest, b); }
4644 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4645 return false;
4646
4647 // If callee and caller use different calling conventions, we cannot pass
4648 // parameters on stack since offsets for the parameter area may be different.
4649 if (Caller.getCallingConv() != CalleeCC &&
4650 needStackSlotPassParameters(Subtarget, Outs))
4651 return false;
4652
4653 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4654 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4655 // callee potentially have different TOC bases then we cannot tail call since
4656 // we need to restore the TOC pointer after the call.
4657 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4658 // We cannot guarantee this for indirect calls or calls to external functions.
4659 // When PC-Relative addressing is used, the concept of the TOC is no longer
4660 // applicable so this check is not required.
4661 // Check first for indirect calls.
4662 if (!Subtarget.isUsingPCRelativeCalls() &&
4664 return false;
4665
4666 // Check if we share the TOC base.
4667 if (!Subtarget.isUsingPCRelativeCalls() &&
4669 return false;
4670
4671 // TCO allows altering callee ABI, so we don't have to check further.
4672 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4673 return true;
4674
4675 if (DisableSCO) return false;
4676
4677 // If callee use the same argument list that caller is using, then we can
4678 // apply SCO on this case. If it is not, then we need to check if callee needs
4679 // stack for passing arguments.
4680 // PC Relative tail calls may not have a CallBase.
4681 // If there is no CallBase we cannot verify if we have the same argument
4682 // list so assume that we don't have the same argument list.
4683 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4684 needStackSlotPassParameters(Subtarget, Outs))
4685 return false;
4686 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4687 return false;
4688
4689 return true;
4690}
4691
4692/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4693/// for tail call optimization. Targets which want to do tail call
4694/// optimization should implement this function.
4695bool
4696PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4697 CallingConv::ID CalleeCC,
4698 bool isVarArg,
4700 SelectionDAG& DAG) const {
4701 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4702 return false;
4703
4704 // Variable argument functions are not supported.
4705 if (isVarArg)
4706 return false;
4707
4709 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4710 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4711 // Functions containing by val parameters are not supported.
4712 for (unsigned i = 0; i != Ins.size(); i++) {
4713 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4714 if (Flags.isByVal()) return false;
4715 }
4716
4717 // Non-PIC/GOT tail calls are supported.
4718 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4719 return true;
4720
4721 // At the moment we can only do local tail calls (in same module, hidden
4722 // or protected) if we are generating PIC.
4724 return G->getGlobal()->hasHiddenVisibility()
4725 || G->getGlobal()->hasProtectedVisibility();
4726 }
4727
4728 return false;
4729}
4730
4731/// isCallCompatibleAddress - Return the immediate to use if the specified
4732/// 32-bit value is representable in the immediate field of a BxA instruction.
4735 if (!C) return nullptr;
4736
4737 int Addr = C->getZExtValue();
4738 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4740 return nullptr; // Top 6 bits have to be sext of immediate.
4741
4742 return DAG
4743 .getConstant(
4744 (int)C->getZExtValue() >> 2, SDLoc(Op),
4746 .getNode();
4747}
4748
4749namespace {
4750
4751struct TailCallArgumentInfo {
4752 SDValue Arg;
4753 SDValue FrameIdxOp;
4754 int FrameIdx = 0;
4755
4756 TailCallArgumentInfo() = default;
4757};
4758
4759} // end anonymous namespace
4760
4761/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4763 SelectionDAG &DAG, SDValue Chain,
4764 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4765 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4766 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4767 SDValue Arg = TailCallArgs[i].Arg;
4768 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4769 int FI = TailCallArgs[i].FrameIdx;
4770 // Store relative to framepointer.
4771 MemOpChains.push_back(DAG.getStore(
4772 Chain, dl, Arg, FIN,
4774 }
4775}
4776
4777/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4778/// the appropriate stack slot for the tail call optimized function call.
4780 SDValue OldRetAddr, SDValue OldFP,
4781 int SPDiff, const SDLoc &dl) {
4782 if (SPDiff) {
4783 // Calculate the new stack slot for the return address.
4785 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4786 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4787 bool isPPC64 = Subtarget.isPPC64();
4788 int SlotSize = isPPC64 ? 8 : 4;
4789 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4790 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4791 NewRetAddrLoc, true);
4792 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4793 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4794 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4795 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4796 }
4797 return Chain;
4798}
4799
4800/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4801/// the position of the argument.
4802static void
4804 SDValue Arg, int SPDiff, unsigned ArgOffset,
4805 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4806 int Offset = ArgOffset + SPDiff;
4807 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4808 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4809 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4810 SDValue FIN = DAG.getFrameIndex(FI, VT);
4811 TailCallArgumentInfo Info;
4812 Info.Arg = Arg;
4813 Info.FrameIdxOp = FIN;
4814 Info.FrameIdx = FI;
4815 TailCallArguments.push_back(Info);
4816}
4817
4818/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4819/// stack slot. Returns the chain as result and the loaded frame pointers in
4820/// LROpOut/FPOpout. Used when tail calling.
4821SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4822 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4823 SDValue &FPOpOut, const SDLoc &dl) const {
4824 if (SPDiff) {
4825 // Load the LR and FP stack slot for later adjusting.
4826 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4827 LROpOut = getReturnAddrFrameIndex(DAG);
4828 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4829 Chain = SDValue(LROpOut.getNode(), 1);
4830 }
4831 return Chain;
4832}
4833
4834/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4835/// by "Src" to address "Dst" of size "Size". Alignment information is
4836/// specified by the specific parameter attribute. The copy will be passed as
4837/// a byval function parameter.
4838/// Sometimes what we are copying is the end of a larger object, the part that
4839/// does not fit in registers.
4841 SDValue Chain, ISD::ArgFlagsTy Flags,
4842 SelectionDAG &DAG, const SDLoc &dl) {
4843 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4844 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
4845 Flags.getNonZeroByValAlign(), false, false, false,
4847}
4848
4849/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4850/// tail calls.
4853 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4854 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4855 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4857 if (!isTailCall) {
4858 if (isVector) {
4859 SDValue StackPtr;
4860 if (isPPC64)
4861 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4862 else
4863 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4864 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4865 DAG.getConstant(ArgOffset, dl, PtrVT));
4866 }
4867 MemOpChains.push_back(
4868 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4869 // Calculate and remember argument location.
4870 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4871 TailCallArguments);
4872}
4873
4874static void
4876 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4877 SDValue FPOp,
4878 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4879 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4880 // might overwrite each other in case of tail call optimization.
4881 SmallVector<SDValue, 8> MemOpChains2;
4882 // Do not flag preceding copytoreg stuff together with the following stuff.
4883 InFlag = SDValue();
4884 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4885 MemOpChains2, dl);
4886 if (!MemOpChains2.empty())
4887 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4888
4889 // Store the return address to the appropriate stack slot.
4890 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4891
4892 // Emit callseq_end just before tailcall node.
4893 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4894 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4895 InFlag = Chain.getValue(1);
4896}
4897
4898// Is this global address that of a function that can be called by name? (as
4899// opposed to something that must hold a descriptor for an indirect call).
4902 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4903 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4904 return false;
4905
4906 return G->getGlobal()->getValueType()->isFunctionTy();
4907 }
4908
4909 return false;
4910}
4911
4912SDValue PPCTargetLowering::LowerCallResult(
4913 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4914 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4915 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4917 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4918 *DAG.getContext());
4919
4920 CCRetInfo.AnalyzeCallResult(
4921 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
4923 : RetCC_PPC);
4924
4925 // Copy all of the result registers out of their specified physreg.
4926 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4927 CCValAssign &VA = RVLocs[i];
4928 assert(VA.isRegLoc() && "Can only return in registers!");
4929
4930 SDValue Val;
4931
4932 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
4933 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4934 InFlag);
4935 Chain = Lo.getValue(1);
4936 InFlag = Lo.getValue(2);
4937 VA = RVLocs[++i]; // skip ahead to next loc
4938 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4939 InFlag);
4940 Chain = Hi.getValue(1);
4941 InFlag = Hi.getValue(2);
4942 if (!Subtarget.isLittleEndian())
4943 std::swap (Lo, Hi);
4944 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
4945 } else {
4946 Val = DAG.getCopyFromReg(Chain, dl,
4947 VA.getLocReg(), VA.getLocVT(), InFlag);
4948 Chain = Val.getValue(1);
4949 InFlag = Val.getValue(2);
4950 }
4951
4952 switch (VA.getLocInfo()) {
4953 default: llvm_unreachable("Unknown loc info!");
4954 case CCValAssign::Full: break;
4955 case CCValAssign::AExt:
4956 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4957 break;
4958 case CCValAssign::ZExt:
4959 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4960 DAG.getValueType(VA.getValVT()));
4961 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4962 break;
4963 case CCValAssign::SExt:
4964 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4965 DAG.getValueType(VA.getValVT()));
4966 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4967 break;
4968 }
4969
4970 InVals.push_back(Val);
4971 }
4972
4973 return Chain;
4974}
4975
4977 const PPCSubtarget &Subtarget, bool isPatchPoint) {
4978 // PatchPoint calls are not indirect.
4979 if (isPatchPoint)
4980 return false;
4981
4983 return false;
4984
4985 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
4986 // becuase the immediate function pointer points to a descriptor instead of
4987 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
4988 // pointer immediate points to the global entry point, while the BLA would
4989 // need to jump to the local entry point (see rL211174).
4990 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
4992 return false;
4993
4994 return true;
4995}
4996
4997// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
4998static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
4999 return Subtarget.isAIXABI() ||
5000 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5001}
5002
5004 const Function &Caller,
5005 const SDValue &Callee,
5006 const PPCSubtarget &Subtarget,
5007 const TargetMachine &TM) {
5008 if (CFlags.IsTailCall)
5009 return PPCISD::TC_RETURN;
5010
5011 // This is a call through a function pointer.
5012 if (CFlags.IsIndirect) {
5013 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5014 // indirect calls. The save of the caller's TOC pointer to the stack will be
5015 // inserted into the DAG as part of call lowering. The restore of the TOC
5016 // pointer is modeled by using a pseudo instruction for the call opcode that
5017 // represents the 2 instruction sequence of an indirect branch and link,
5018 // immediately followed by a load of the TOC pointer from the the stack save
5019 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5020 // as it is not saved or used.
5022 : PPCISD::BCTRL;
5023 }
5024
5025 if (Subtarget.isUsingPCRelativeCalls()) {
5026 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5027 return PPCISD::CALL_NOTOC;
5028 }
5029
5030 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5031 // immediately following the call instruction if the caller and callee may
5032 // have different TOC bases. At link time if the linker determines the calls
5033 // may not share a TOC base, the call is redirected to a trampoline inserted
5034 // by the linker. The trampoline will (among other things) save the callers
5035 // TOC pointer at an ABI designated offset in the linkage area and the linker
5036 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5037 // into gpr2.
5038 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5039 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5041
5042 return PPCISD::CALL;
5043}
5044
5046 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5047 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5048 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5049 return SDValue(Dest, 0);
5050
5051 // Returns true if the callee is local, and false otherwise.
5052 auto isLocalCallee = [&]() {
5055 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5056
5057 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5059 };
5060
5061 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5062 // a static relocation model causes some versions of GNU LD (2.17.50, at
5063 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5064 // built with secure-PLT.
5065 bool UsePlt =
5066 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5068
5069 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5070 const TargetMachine &TM = Subtarget.getTargetMachine();
5071 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5072 MCSymbolXCOFF *S =
5074
5076 return DAG.getMCSymbol(S, PtrVT);
5077 };
5078
5080 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5081
5082 if (Subtarget.isAIXABI()) {
5083 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5084 return getAIXFuncEntryPointSymbolSDNode(GV);
5085 }
5086 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5087 UsePlt ? PPCII::MO_PLT : 0);
5088 }
5089
5091 const char *SymName = S->getSymbol();
5092 if (Subtarget.isAIXABI()) {
5093 // If there exists a user-declared function whose name is the same as the
5094 // ExternalSymbol's, then we pick up the user-declared version.
5096 if (const Function *F =
5097 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5098 return getAIXFuncEntryPointSymbolSDNode(F);
5099
5100 // On AIX, direct function calls reference the symbol for the function's
5101 // entry point, which is named by prepending a "." before the function's
5102 // C-linkage name. A Qualname is returned here because an external
5103 // function entry point is a csect with XTY_ER property.
5104 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5105 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5106 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5107 (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5109 return Sec->getQualNameSymbol();
5110 };
5111
5112 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5113 }
5114 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5115 UsePlt ? PPCII::MO_PLT : 0);
5116 }
5117
5118 // No transformation needed.
5119 assert(Callee.getNode() && "What no callee?");
5120 return Callee;
5121}
5122
5124 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5125 "Expected a CALLSEQ_STARTSDNode.");
5126
5127 // The last operand is the chain, except when the node has glue. If the node
5128 // has glue, then the last operand is the glue, and the chain is the second
5129 // last operand.
5130 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5131 if (LastValue.getValueType() != MVT::Glue)
5132 return LastValue;
5133
5134 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5135}
5136
5137// Creates the node that moves a functions address into the count register
5138// to prepare for an indirect call instruction.
5140 SDValue &Glue, SDValue &Chain,
5141 const SDLoc &dl) {
5142 SDValue MTCTROps[] = {Chain, Callee, Glue};
5143 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5144 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5145 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5146 // The glue is the second value produced.
5147 Glue = Chain.getValue(1);
5148}
5149
5151 SDValue &Glue, SDValue &Chain,
5152 SDValue CallSeqStart,
5153 const CallBase *CB, const SDLoc &dl,
5154 bool hasNest,
5155 const PPCSubtarget &Subtarget) {
5156 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5157 // entry point, but to the function descriptor (the function entry point
5158 // address is part of the function descriptor though).
5159 // The function descriptor is a three doubleword structure with the
5160 // following fields: function entry point, TOC base address and
5161 // environment pointer.
5162 // Thus for a call through a function pointer, the following actions need
5163 // to be performed:
5164 // 1. Save the TOC of the caller in the TOC save area of its stack
5165 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5166 // 2. Load the address of the function entry point from the function
5167 // descriptor.
5168 // 3. Load the TOC of the callee from the function descriptor into r2.
5169 // 4. Load the environment pointer from the function descriptor into
5170 // r11.
5171 // 5. Branch to the function entry point address.
5172 // 6. On return of the callee, the TOC of the caller needs to be
5173 // restored (this is done in FinishCall()).
5174 //
5175 // The loads are scheduled at the beginning of the call sequence, and the
5176 // register copies are flagged together to ensure that no other
5177 // operations can be scheduled in between. E.g. without flagging the
5178 // copies together, a TOC access in the caller could be scheduled between
5179 // the assignment of the callee TOC and the branch to the callee, which leads
5180 // to incorrect code.
5181
5182 // Start by loading the function address from the descriptor.
5183 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5184 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5188
5189 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5190
5191 // Registers used in building the DAG.
5192 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5193 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5194
5195 // Offsets of descriptor members.
5196 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5197 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5198
5199 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5200 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5201
5202 // One load for the functions entry point address.
5203 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5204 Alignment, MMOFlags);
5205
5206 // One for loading the TOC anchor for the module that contains the called
5207 // function.
5208 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5209 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5210 SDValue TOCPtr =
5211 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5212 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5213
5214 // One for loading the environment pointer.
5215 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5216 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5217 SDValue LoadEnvPtr =
5218 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5219 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5220
5221
5222 // Then copy the newly loaded TOC anchor to the TOC pointer.
5223 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5224 Chain = TOCVal.getValue(0);
5225 Glue = TOCVal.getValue(1);
5226
5227 // If the function call has an explicit 'nest' parameter, it takes the
5228 // place of the environment pointer.
5229 assert((!hasNest || !Subtarget.isAIXABI()) &&
5230 "Nest parameter is not supported on AIX.");
5231 if (!hasNest) {
5232 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5233 Chain = EnvVal.getValue(0);
5234 Glue = EnvVal.getValue(1);
5235 }
5236
5237 // The rest of the indirect call sequence is the same as the non-descriptor
5238 // DAG.
5239 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5240}
5241
5242static void
5244 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5245 SelectionDAG &DAG,
5246 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5247 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5248 const PPCSubtarget &Subtarget) {
5249 const bool IsPPC64 = Subtarget.isPPC64();
5250 // MVT for a general purpose register.
5251 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5252
5253 // First operand is always the chain.
5254 Ops.push_back(Chain);
5255
5256 // If it's a direct call pass the callee as the second operand.
5257 if (!CFlags.IsIndirect)
5258 Ops.push_back(Callee);
5259 else {
5260 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5261
5262 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5263 // on the stack (this would have been done in `LowerCall_64SVR4` or
5264 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5265 // represents both the indirect branch and a load that restores the TOC
5266 // pointer from the linkage area. The operand for the TOC restore is an add
5267 // of the TOC save offset to the stack pointer. This must be the second
5268 // operand: after the chain input but before any other variadic arguments.
5269 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5270 // saved or used.
5271 if (isTOCSaveRestoreRequired(Subtarget)) {
5272 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5273
5274 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5275 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5276 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5277 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5278 Ops.push_back(AddTOC);
5279 }
5280
5281 // Add the register used for the environment pointer.
5282 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5283 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5284 RegVT));
5285
5286
5287 // Add CTR register as callee so a bctr can be emitted later.
5288 if (CFlags.IsTailCall)
5289 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5290 }
5291
5292 // If this is a tail call add stack pointer delta.
5293 if (CFlags.IsTailCall)
5294 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5295
5296 // Add argument registers to the end of the list so that they are known live
5297 // into the call.
5298 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5299 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5300 RegsToPass[i].second.getValueType()));
5301
5302 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5303 // no way to mark dependencies as implicit here.
5304 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5305 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5306 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5307 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5308
5309 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5310 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5311 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5312
5313 // Add a register mask operand representing the call-preserved registers.
5314 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5315 const uint32_t *Mask =
5316 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5317 assert(Mask && "Missing call preserved mask for calling convention");
5318 Ops.push_back(DAG.getRegisterMask(Mask));
5319
5320 // If the glue is valid, it is the last operand.
5321 if (Glue.getNode())
5322 Ops.push_back(Glue);
5323}
5324
5325SDValue PPCTargetLowering::FinishCall(
5326 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5327 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5328 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5329 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5330 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5331
5332 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5333 Subtarget.isAIXABI())
5334 setUsesTOCBasePtr(DAG);
5335
5336 unsigned CallOpc =
5338 Subtarget, DAG.getTarget());
5339
5340 if (!CFlags.IsIndirect)
5341 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5342 else if (Subtarget.usesFunctionDescriptors())
5343 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5344 dl, CFlags.HasNest, Subtarget);
5345 else
5346 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5347
5348 // Build the operand list for the call instruction.
5350 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5351 SPDiff, Subtarget);
5352
5353 // Emit tail call.
5354 if (CFlags.IsTailCall) {
5355 // Indirect tail call when using PC Relative calls do not have the same
5356 // constraints.
5357 assert(((Callee.getOpcode() == ISD::Register &&
5358 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5359 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5360 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5362 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5363 "Expecting a global address, external symbol, absolute value, "
5364 "register or an indirect tail call when PC Relative calls are "
5365 "used.");
5366 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5367 assert(CallOpc == PPCISD::TC_RETURN &&
5368 "Unexpected call opcode for a tail call.");
5370 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5371 }
5372
5373 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5374 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5375 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5376 Glue = Chain.getValue(1);
5377
5378 // When performing tail call optimization the callee pops its arguments off
5379 // the stack. Account for this here so these bytes can be pushed back on in
5380 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5381 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5383 ? NumBytes
5384 : 0;
5385
5386 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5387 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5388 Glue, dl);
5389 Glue = Chain.getValue(1);
5390
5391 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5392 DAG, InVals);
5393}
5394
5395SDValue
5396PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5397 SmallVectorImpl<SDValue> &InVals) const {
5398 SelectionDAG &DAG = CLI.DAG;
5399 SDLoc &dl = CLI.DL;
5401 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5403 SDValue Chain = CLI.Chain;
5404 SDValue Callee = CLI.Callee;
5405 bool &isTailCall = CLI.IsTailCall;
5406 CallingConv::ID CallConv = CLI.CallConv;
5407 bool isVarArg = CLI.IsVarArg;
5408 bool isPatchPoint = CLI.IsPatchPoint;
5409 const CallBase *CB = CLI.CB;
5410
5411 if (isTailCall) {
5412 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5413 isTailCall = false;
5414 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5415 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5416 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5417 else
5418 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5419 Ins, DAG);
5420 if (isTailCall) {
5421 ++NumTailCalls;
5422 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5423 ++NumSiblingCalls;
5424
5425 // PC Relative calls no longer guarantee that the callee is a Global
5426 // Address Node. The callee could be an indirect tail call in which
5427 // case the SDValue for the callee could be a load (to load the address
5428 // of a function pointer) or it may be a register copy (to move the
5429 // address of the callee from a function parameter into a virtual
5430 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5431 assert((Subtarget.isUsingPCRelativeCalls() ||
5433 "Callee should be an llvm::Function object.");
5434
5435 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5436 << "\nTCO callee: ");
5437 LLVM_DEBUG(Callee.dump());
5438 }
5439 }
5440
5441 if (!isTailCall && CB && CB->isMustTailCall())
5442 report_fatal_error("failed to perform tail call elimination on a call "
5443 "site marked musttail");
5444
5445 // When long calls (i.e. indirect calls) are always used, calls are always
5446 // made via function pointer. If we have a function name, first translate it
5447 // into a pointer.
5448 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5449 !isTailCall)
5450 Callee = LowerGlobalAddress(Callee, DAG);
5451
5452 CallFlags CFlags(
5453 CallConv, isTailCall, isVarArg, isPatchPoint,
5454 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5455 // hasNest
5456 Subtarget.is64BitELFABI() &&
5457 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5458 CLI.NoMerge);
5459
5460 if (Subtarget.isAIXABI())
5461 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5462 InVals, CB);
5463
5464 assert(Subtarget.isSVR4ABI());
5465 if (Subtarget.isPPC64())
5466 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5467 InVals, CB);
5468 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5469 InVals, CB);
5470}
5471
5472SDValue PPCTargetLowering::LowerCall_32SVR4(
5473 SDValue Chain, SDValue Callee, CallFlags CFlags,
5475 const SmallVectorImpl<SDValue> &OutVals,
5476 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5478 const CallBase *CB) const {
5479 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5480 // of the 32-bit SVR4 ABI stack frame layout.
5481
5482 const CallingConv::ID CallConv = CFlags.CallConv;
5483 const bool IsVarArg = CFlags.IsVarArg;
5484 const bool IsTailCall = CFlags.IsTailCall;
5485
5486 assert((CallConv == CallingConv::C ||
5487 CallConv == CallingConv::Cold ||
5488 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5489
5490 const Align PtrAlign(4);
5491
5493
5494 // Mark this function as potentially containing a function that contains a
5495 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5496 // and restoring the callers stack pointer in this functions epilog. This is
5497 // done because by tail calling the called function might overwrite the value
5498 // in this function's (MF) stack pointer stack slot 0(SP).
5499 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5500 CallConv == CallingConv::Fast)
5501 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5502
5503 // Count how many bytes are to be pushed on the stack, including the linkage
5504 // area, parameter list area and the part of the local variable space which
5505 // contains copies of aggregates which are passed by value.
5506
5507 // Assign locations to all of the outgoing arguments.
5509 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5510
5511 // Reserve space for the linkage area on the stack.
5512 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5513 PtrAlign);
5514 if (useSoftFloat())
5515 CCInfo.PreAnalyzeCallOperands(Outs);
5516
5517 if (IsVarArg) {
5518 // Handle fixed and variable vector arguments differently.
5519 // Fixed vector arguments go into registers as long as registers are
5520 // available. Variable vector arguments always go into memory.
5521 unsigned NumArgs = Outs.size();
5522
5523 for (unsigned i = 0; i != NumArgs; ++i) {
5524 MVT ArgVT = Outs[i].VT;
5525 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5526 bool Result;
5527
5528 if (Outs[i].IsFixed) {
5529 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5530 CCInfo);
5531 } else {
5533 ArgFlags, CCInfo);
5534 }
5535
5536 if (Result) {
5537#ifndef NDEBUG
5538 errs() << "Call operand #" << i << " has unhandled type "
5539 << EVT(ArgVT).getEVTString() << "\n";
5540#endif
5541 llvm_unreachable(nullptr);
5542 }
5543 }
5544 } else {
5545 // All arguments are treated the same.
5546 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5547 }
5548 CCInfo.clearWasPPCF128();
5549
5550 // Assign locations to all of the outgoing aggregate by value arguments.
5551 SmallVector<CCValAssign, 16> ByValArgLocs;
5552 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5553
5554 // Reserve stack space for the allocations in CCInfo.
5555 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5556
5557 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5558
5559 // Size of the linkage area, parameter list area and the part of the local
5560 // space variable where copies of aggregates which are passed by value are
5561 // stored.
5562 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5563
5564 // Calculate by how many bytes the stack has to be adjusted in case of tail
5565 // call optimization.
5566 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5567
5568 // Adjust the stack pointer for the new arguments...
5569 // These operations are automatically eliminated by the prolog/epilog pass
5570 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5571 SDValue CallSeqStart = Chain;
5572
5573 // Load the return address and frame pointer so it can be moved somewhere else
5574 // later.
5575 SDValue LROp, FPOp;
5576 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5577
5578 // Set up a copy of the stack pointer for use loading and storing any
5579 // arguments that may not fit in the registers available for argument
5580 // passing.
5581 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5582
5584 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5585 SmallVector<SDValue, 8> MemOpChains;
5586
5587 bool seenFloatArg = false;
5588 // Walk the register/memloc assignments, inserting copies/loads.
5589 // i - Tracks the index into the list of registers allocated for the call
5590 // RealArgIdx - Tracks the index into the list of actual function arguments
5591 // j - Tracks the index into the list of byval arguments
5592 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5593 i != e;
5594 ++i, ++RealArgIdx) {
5595 CCValAssign &VA = ArgLocs[i];
5596 SDValue Arg = OutVals[RealArgIdx];
5597 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5598
5599 if (Flags.isByVal()) {
5600 // Argument is an aggregate which is passed by value, thus we need to
5601 // create a copy of it in the local variable space of the current stack
5602 // frame (which is the stack frame of the caller) and pass the address of
5603 // this copy to the callee.
5604 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5605 CCValAssign &ByValVA = ByValArgLocs[j++];
5606 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5607
5608 // Memory reserved in the local variable space of the callers stack frame.
5609 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5610
5611 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5612 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5613 StackPtr, PtrOff);
5614
5615 // Create a copy of the argument in the local area of the current
5616 // stack frame.
5617 SDValue MemcpyCall =
5619 CallSeqStart.getNode()->getOperand(0),
5620 Flags, DAG, dl);
5621
5622 // This must go outside the CALLSEQ_START..END.
5623 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5624 SDLoc(MemcpyCall));
5625 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5626 NewCallSeqStart.getNode());
5627 Chain = CallSeqStart = NewCallSeqStart;
5628
5629 // Pass the address of the aggregate copy on the stack either in a
5630 // physical register or in the parameter list area of the current stack
5631 // frame to the callee.
5632 Arg = PtrOff;
5633 }
5634
5635 // When useCRBits() is true, there can be i1 arguments.
5636 // It is because getRegisterType(MVT::i1) => MVT::i1,
5637 // and for other integer types getRegisterType() => MVT::i32.
5638 // Extend i1 and ensure callee will get i32.
5639 if (Arg.getValueType() == MVT::i1)
5640 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5641 dl, MVT::i32, Arg);
5642
5643 if (VA.isRegLoc()) {
5644 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5645 // Put argument in a physical register.
5646 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5647 bool IsLE = Subtarget.isLittleEndian();
5649 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5650 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5651 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5652 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5653 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5654 SVal.getValue(0)));
5655 } else
5656 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5657 } else {
5658 // Put argument in the parameter list area of the current stack frame.
5659 assert(VA.isMemLoc());
5660 unsigned LocMemOffset = VA.getLocMemOffset();
5661
5662 if (!IsTailCall) {
5663 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5664 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5665 StackPtr, PtrOff);
5666
5667 MemOpChains.push_back(
5668 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5669 } else {
5670 // Calculate and remember argument location.
5671 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5672 TailCallArguments);
5673 }
5674 }
5675 }
5676
5677 if (!MemOpChains.empty())
5678 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5679
5680 // Build a sequence of copy-to-reg nodes chained together with token chain
5681 // and flag operands which copy the outgoing args into the appropriate regs.
5682 SDValue InFlag;
5683 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5684 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5685 RegsToPass[i].second, InFlag);
5686 InFlag = Chain.getValue(1);
5687 }
5688
5689 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5690 // registers.
5691 if (IsVarArg) {
5693 SDValue Ops[] = { Chain, InFlag };
5694
5695 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5696 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5697
5698 InFlag = Chain.getValue(1);
5699 }
5700
5701 if (IsTailCall)
5702 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5703 TailCallArguments);
5704
5705 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5706 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5707}
5708
5709// Copy an argument into memory, being careful to do this outside the
5710// call sequence for the call to which the argument belongs.
5711SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5712 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5713 SelectionDAG &DAG, const SDLoc &dl) const {
5714 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5715 CallSeqStart.getNode()->getOperand(0),
5716 Flags, DAG, dl);
5717 // The MEMCPY must go outside the CALLSEQ_START..END.
5718 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5719 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5720 SDLoc(MemcpyCall));
5721 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5722 NewCallSeqStart.getNode());
5723 return NewCallSeqStart;
5724}
5725
5726SDValue PPCTargetLowering::LowerCall_64SVR4(
5727 SDValue Chain, SDValue Callee, CallFlags CFlags,
5729 const SmallVectorImpl<SDValue> &OutVals,
5730 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5732 const CallBase *CB) const {
5733 bool isELFv2ABI = Subtarget.isELFv2ABI();
5734 bool isLittleEndian = Subtarget.isLittleEndian();
5735 unsigned NumOps = Outs.size();
5736 bool IsSibCall = false;
5737 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5738
5739 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5740 unsigned PtrByteSize = 8;
5741
5743
5744 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5745 IsSibCall = true;
5746
5747 // Mark this function as potentially containing a function that contains a
5748 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5749 // and restoring the callers stack pointer in this functions epilog. This is
5750 // done because by tail calling the called function might overwrite the value
5751 // in this function's (MF) stack pointer stack slot 0(SP).
5752 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5753 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5754
5755 assert(!(IsFastCall && CFlags.IsVarArg) &&
5756 "fastcc not supported on varargs functions");
5757
5758 // Count how many bytes are to be pushed on the stack, including the linkage
5759 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5760 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5761 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5762 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5763 unsigned NumBytes = LinkageSize;
5764 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5765
5766 static const MCPhysReg GPR[] = {
5767 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5768 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5769 };
5770 static const MCPhysReg VR[] = {
5771 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5772 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5773 };
5774
5775 const unsigned NumGPRs = array_lengthof(GPR);
5776 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5777 const unsigned NumVRs = array_lengthof(VR);
5778
5779 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5780 // can be passed to the callee in registers.
5781 // For the fast calling convention, there is another check below.
5782 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5783 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5784 if (!HasParameterArea) {
5785 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5786 unsigned AvailableFPRs = NumFPRs;
5787 unsigned AvailableVRs = NumVRs;
5788 unsigned NumBytesTmp = NumBytes;
5789 for (unsigned i = 0; i != NumOps; ++i) {
5790 if (Outs[i].Flags.isNest()) continue;
5791 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5792 PtrByteSize, LinkageSize, ParamAreaSize,
5793 NumBytesTmp, AvailableFPRs, AvailableVRs))
5794 HasParameterArea = true;
5795 }
5796 }
5797
5798 // When using the fast calling convention, we don't provide backing for
5799 // arguments that will be in registers.
5800 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5801
5802 // Avoid allocating parameter area for fastcc functions if all the arguments
5803 // can be passed in the registers.
5804 if (IsFastCall)
5805 HasParameterArea = false;
5806
5807 // Add up all the space actually used.
5808 for (unsigned i = 0; i != NumOps; ++i) {
5809 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5810 EVT ArgVT = Outs[i].VT;
5811 EVT OrigVT = Outs[i].ArgVT;
5812
5813 if (Flags.isNest())
5814 continue;
5815
5816 if (IsFastCall) {
5817 if (Flags.isByVal()) {
5818 NumGPRsUsed += (Flags.getByValSize()+7)/8;
5819 if (NumGPRsUsed > NumGPRs)
5820 HasParameterArea = true;
5821 } else {
5822 switch (ArgVT.getSimpleVT().SimpleTy) {
5823 default: llvm_unreachable("Unexpected ValueType for argument!");
5824 case MVT::i1:
5825 case MVT::i32:
5826 case MVT::i64:
5827 if (++NumGPRsUsed <= NumGPRs)
5828 continue;
5829 break;
5830 case MVT::v4i32:
5831 case MVT::v8i16:
5832 case MVT::v16i8:
5833 case MVT::v2f64:
5834 case MVT::v2i64:
5835 case MVT::v1i128:
5836 case MVT::f128:
5837 if (++NumVRsUsed <= NumVRs)
5838 continue;
5839 break;
5840 case MVT::v4f32:
5841 if (++NumVRsUsed <= NumVRs)
5842 continue;
5843 break;
5844 case MVT::f32:
5845 case MVT::f64:
5846 if (++NumFPRsUsed <= NumFPRs)
5847 continue;
5848 break;
5849 }
5850 HasParameterArea = true;
5851 }
5852 }
5853
5854 /* Respect alignment of argument on the stack. */
5855 auto Alignement =
5856 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5857 NumBytes = alignTo(NumBytes, Alignement);
5858
5859 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5860 if (Flags.isInConsecutiveRegsLast())
5861 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5862 }
5863
5864 unsigned NumBytesActuallyUsed = NumBytes;
5865
5866 // In the old ELFv1 ABI,
5867 // the prolog code of the callee may store up to 8 GPR argument registers to
5868 // the stack, allowing va_start to index over them in memory if its varargs.
5869 // Because we cannot tell if this is needed on the caller side, we have to
5870 // conservatively assume that it is needed. As such, make sure we have at
5871 // least enough stack space for the caller to store the 8 GPRs.
5872 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5873 // really requires memory operands, e.g. a vararg function.
5874 if (HasParameterArea)
5875 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5876 else
5877 NumBytes = LinkageSize;
5878
5879 // Tail call needs the stack to be aligned.
5880 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5881 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5882
5883 int SPDiff = 0;
5884
5885 // Calculate by how many bytes the stack has to be adjusted in case of tail
5886 // call optimization.
5887 if (!IsSibCall)
5888 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
5889
5890 // To protect arguments on the stack from being clobbered in a tail call,
5891 // force all the loads to happen before doing any other lowering.
5892 if (CFlags.IsTailCall)
5893 Chain = DAG.getStackArgumentTokenFactor(Chain);
5894
5895 // Adjust the stack pointer for the new arguments...
5896 // These operations are automatically eliminated by the prolog/epilog pass
5897 if (!IsSibCall)
5898 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5899 SDValue CallSeqStart = Chain;
5900
5901 // Load the return address and frame pointer so it can be move somewhere else
5902 // later.
5903 SDValue LROp, FPOp;
5904 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5905
5906 // Set up a copy of the stack pointer for use loading and storing any
5907 // arguments that may not fit in the registers available for argument
5908 // passing.
5909 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5910
5911 // Figure out which arguments are going to go in registers, and which in
5912 // memory. Also, if this is a vararg function, floating point operations
5913 // must be stored to our stack, and loaded into integer regs as well, if
5914 // any integer regs are available for argument passing.
5915 unsigned ArgOffset = LinkageSize;
5916
5918 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5919
5920 SmallVector<SDValue, 8> MemOpChains;
5921 for (unsigned i = 0; i != NumOps; ++i) {
5922 SDValue Arg = OutVals[i];
5923 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5924 EVT ArgVT = Outs[i].VT;
5925 EVT OrigVT = Outs[i].ArgVT;
5926
5927 // PtrOff will be used to store the current argument to the stack if a
5928 // register cannot be found for it.
5929 SDValue PtrOff;
5930
5931 // We re-align the argument offset for each argument, except when using the
5932 // fast calling convention, when we need to make sure we do that only when
5933 // we'll actually use a stack slot.
5934 auto ComputePtrOff = [&]() {
5935 /* Respect alignment of argument on the stack. */
5936 auto Alignment =
5937 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5938 ArgOffset = alignTo(ArgOffset, Alignment);
5939
5940 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5941
5942 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5943 };
5944
5945 if (!IsFastCall) {
5946 ComputePtrOff();
5947
5948 /* Compute GPR index associated with argument offset. */
5949 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5950 GPR_idx = std::min(GPR_idx, NumGPRs);
5951 }
5952
5953 // Promote integers to 64-bit values.
5954 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5955 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5956 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5957 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5958 }
5959
5960 // FIXME memcpy is used way more than necessary. Correctness first.
5961 // Note: "by value" is code for passing a structure by value, not
5962 // basic types.
5963 if (Flags.isByVal()) {
5964 // Note: Size includes alignment padding, so
5965 // struct x { short a; char b; }
5966 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5967 // These are the proper values we need for right-justifying the
5968 // aggregate in a parameter register.
5969 unsigned Size = Flags.getByValSize();
5970
5971 // An empty aggregate parameter takes up no storage and no
5972 // registers.
5973 if (Size == 0)
5974 continue;
5975
5976 if (IsFastCall)
5977 ComputePtrOff();
5978
5979 // All aggregates smaller than 8 bytes must be passed right-justified.
5980 if (Size==1 || Size==2 || Size==4) {
5981 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5982 if (GPR_idx != NumGPRs) {
5983 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5984 MachinePointerInfo(), VT);
5985 MemOpChains.push_back(Load.getValue(1));
5986 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5987
5988 ArgOffset += PtrByteSize;
5989 continue;
5990 }
5991 }
5992
5993 if (GPR_idx == NumGPRs && Size < 8) {
5994 SDValue AddPtr = PtrOff;
5995 if (!isLittleEndian) {
5996 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5997 PtrOff.getValueType());
5998 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5999 }
6000 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6001 CallSeqStart,
6002 Flags, DAG, dl);
6003 ArgOffset += PtrByteSize;
6004 continue;
6005 }
6006 // Copy entire object into memory. There are cases where gcc-generated
6007 // code assumes it is there, even if it could be put entirely into
6008 // registers. (This is not what the doc says.)
6009
6010 // FIXME: The above statement is likely due to a misunderstanding of the
6011 // documents. All arguments must be copied into the parameter area BY
6012 // THE CALLEE in the event that the callee takes the address of any
6013 // formal argument. That has not yet been implemented. However, it is
6014 // reasonable to use the stack area as a staging area for the register
6015 // load.
6016
6017 // Skip this for small aggregates, as we will use the same slot for a
6018 // right-justified copy, below.
6019 if (Size >= 8)
6020 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6021 CallSeqStart,
6022 Flags, DAG, dl);
6023
6024 // When a register is available, pass a small aggregate right-justified.
6025 if (Size < 8 && GPR_idx != NumGPRs) {
6026 // The easiest way to get this right-justified in a register
6027 // is to copy the structure into the rightmost portion of a
6028 // local variable slot, then load the whole slot into the
6029 // register.
6030 // FIXME: The memcpy seems to produce pretty awful code for
6031 // small aggregates, particularly for packed ones.
6032 // FIXME: It would be preferable to use the slot in the
6033 // parameter save area instead of a new local variable.
6034 SDValue AddPtr = PtrOff;
6035 if (!isLittleEndian) {
6036 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6037 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6038 }
6039 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6040 CallSeqStart,
6041 Flags, DAG, dl);
6042
6043 // Load the slot into the register.
6044 SDValue Load =
6045 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6046 MemOpChains.push_back(Load.getValue(1));
6047 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6048
6049 // Done with this argument.
6050 ArgOffset += PtrByteSize;
6051 continue;
6052 }
6053
6054 // For aggregates larger than PtrByteSize, copy the pieces of the
6055 // object that fit into registers from the parameter save area.
6056 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6057 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6058 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6059 if (GPR_idx != NumGPRs) {
6060 SDValue Load =
6061 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6062 MemOpChains.push_back(Load.getValue(1));
6063 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6064 ArgOffset += PtrByteSize;
6065 } else {
6066 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6067 break;
6068 }
6069 }
6070 continue;
6071 }
6072
6073 switch (Arg.getSimpleValueType().SimpleTy) {
6074 default: llvm_unreachable("Unexpected ValueType for argument!");
6075 case MVT::i1:
6076 case MVT::i32:
6077 case MVT::i64:
6078 if (Flags.isNest()) {
6079 // The 'nest' parameter, if any, is passed in R11.
6080 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6081 break;
6082 }
6083
6084 // These can be scalar arguments or elements of an integer array type
6085 // passed directly. Clang may use those instead of "byval" aggregate
6086 // types to avoid forcing arguments to memory unnecessarily.
6087 if (GPR_idx != NumGPRs) {
6088 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6089 } else {
6090 if (IsFastCall)
6091 ComputePtrOff();
6092
6093 assert(HasParameterArea &&
6094 "Parameter area must exist to pass an argument in memory.");
6095 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6096 true, CFlags.IsTailCall, false, MemOpChains,
6097 TailCallArguments, dl);
6098 if (IsFastCall)
6099 ArgOffset += PtrByteSize;
6100 }
6101 if (!IsFastCall)
6102 ArgOffset += PtrByteSize;
6103 break;
6104 case MVT::f32:
6105 case MVT::f64: {
6106 // These can be scalar arguments or elements of a float array type
6107 // passed directly. The latter are used to implement ELFv2 homogenous
6108 // float aggregates.
6109
6110 // Named arguments go into FPRs first, and once they overflow, the
6111 // remaining arguments go into GPRs and then the parameter save area.
6112 // Unnamed arguments for vararg functions always go to GPRs and
6113 // then the parameter save area. For now, put all arguments to vararg
6114 // routines always in both locations (FPR *and* GPR or stack slot).
6115 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6116 bool NeededLoad = false;
6117
6118 // First load the argument into the next available FPR.
6119 if (FPR_idx != NumFPRs)
6120 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6121
6122 // Next, load the argument into GPR or stack slot if needed.
6123 if (!NeedGPROrStack)
6124 ;
6125 else if (GPR_idx != NumGPRs && !IsFastCall) {
6126 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6127 // once we support fp <-> gpr moves.
6128
6129 // In the non-vararg case, this can only ever happen in the
6130 // presence of f32 array types, since otherwise we never run
6131 // out of FPRs before running out of GPRs.
6132 SDValue ArgVal;
6133
6134 // Double values are always passed in a single GPR.
6135 if (Arg.getValueType() != MVT::f32) {
6136 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6137
6138 // Non-array float values are extended and passed in a GPR.
6139 } else if (!Flags.isInConsecutiveRegs()) {
6140 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6141 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6142
6143 // If we have an array of floats, we collect every odd element
6144 // together with its predecessor into one GPR.
6145 } else if (ArgOffset % PtrByteSize != 0) {
6146 SDValue Lo, Hi;
6147 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6148 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6149 if (!isLittleEndian)
6150 std::swap(Lo, Hi);
6151 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6152
6153 // The final element, if even, goes into the first half of a GPR.
6154 } else if (Flags.isInConsecutiveRegsLast()) {
6155 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6156 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6157 if (!isLittleEndian)
6158 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6159 DAG.getConstant(32, dl, MVT::i32));
6160
6161 // Non-final even elements are skipped; they will be handled
6162 // together the with subsequent argument on the next go-around.
6163 } else
6164 ArgVal = SDValue();
6165
6166 if (ArgVal.getNode())
6167 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6168 } else {
6169 if (IsFastCall)
6170 ComputePtrOff();
6171
6172 // Single-precision floating-point values are mapped to the
6173 // second (rightmost) word of the stack doubleword.
6174 if (Arg.getValueType() == MVT::f32 &&
6175 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6176 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6177 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6178 }
6179
6180 assert(HasParameterArea &&
6181 "Parameter area must exist to pass an argument in memory.");
6182 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6183 true, CFlags.IsTailCall, false, MemOpChains,
6184 TailCallArguments, dl);
6185
6186 NeededLoad = true;
6187 }
6188 // When passing an array of floats, the array occupies consecutive
6189 // space in the argument area; only round up to the next doubleword
6190 // at the end of the array. Otherwise, each float takes 8 bytes.
6191 if (!IsFastCall || NeededLoad) {
6192 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6193 Flags.isInConsecutiveRegs()) ? 4 : 8;
6194 if (Flags.isInConsecutiveRegsLast())
6195 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6196 }
6197 break;
6198 }
6199 case MVT::v4f32:
6200 case MVT::v4i32:
6201 case MVT::v8i16:
6202 case MVT::v16i8:
6203 case MVT::v2f64:
6204 case MVT::v2i64:
6205 case MVT::v1i128:
6206 case MVT::f128:
6207 // These can be scalar arguments or elements of a vector array type
6208 // passed directly. The latter are used to implement ELFv2 homogenous
6209 // vector aggregates.
6210
6211 // For a varargs call, named arguments go into VRs or on the stack as
6212 // usual; unnamed arguments always go to the stack or the corresponding
6213 // GPRs when within range. For now, we always put the value in both
6214 // locations (or even all three).
6215 if (CFlags.IsVarArg) {
6216 assert(HasParameterArea &&
6217 "Parameter area must exist if we have a varargs call.");
6218 // We could elide this store in the case where the object fits
6219 // entirely in R registers. Maybe later.
6220 SDValue Store =
6221 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6222 MemOpChains.push_back(Store);
6223 if (VR_idx != NumVRs) {
6224 SDValue Load =
6225 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6226 MemOpChains.push_back(Load.getValue(1));
6227 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6228 }
6229 ArgOffset += 16;
6230 for (unsigned i=0; i<16; i+=PtrByteSize) {
6231 if (GPR_idx == NumGPRs)
6232 break;
6233 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6234 DAG.getConstant(i, dl, PtrVT));
6235 SDValue Load =
6236 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6237 MemOpChains.push_back(Load.getValue(1));
6238 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6239 }
6240 break;
6241 }
6242
6243 // Non-varargs Altivec params go into VRs or on the stack.
6244 if (VR_idx != NumVRs) {
6245 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6246 } else {
6247 if (IsFastCall)
6248 ComputePtrOff();
6249
6250 assert(HasParameterArea &&
6251 "Parameter area must exist to pass an argument in memory.");
6252 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6253 true, CFlags.IsTailCall, true, MemOpChains,
6254 TailCallArguments, dl);
6255 if (IsFastCall)
6256 ArgOffset += 16;
6257 }
6258
6259 if (!IsFastCall)
6260 ArgOffset += 16;
6261 break;
6262 }
6263 }
6264
6265 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6266 "mismatch in size of parameter area");
6267 (void)NumBytesActuallyUsed;
6268
6269 if (!MemOpChains.empty())
6270 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6271
6272 // Check if this is an indirect call (MTCTR/BCTRL).
6273 // See prepareDescriptorIndirectCall and buildCallOperands for more
6274 // information about calls through function pointers in the 64-bit SVR4 ABI.
6275 if (CFlags.IsIndirect) {
6276 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6277 // caller in the TOC save area.
6278 if (isTOCSaveRestoreRequired(Subtarget)) {
6279 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6280 // Load r2 into a virtual register and store it to the TOC save area.
6281 setUsesTOCBasePtr(DAG);
6282 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6283 // TOC save area offset.
6284 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6285 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6286 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6287 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6289 DAG.getMachineFunction(), TOCSaveOffset));
6290 }
6291 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6292 // This does not mean the MTCTR instruction must use R12; it's easier
6293 // to model this as an extra parameter, so do that.
6294 if (isELFv2ABI && !CFlags.IsPatchPoint)
6295 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6296 }
6297
6298 // Build a sequence of copy-to-reg nodes chained together with token chain
6299 // and flag operands which copy the outgoing args into the appropriate regs.
6300 SDValue InFlag;
6301 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6302 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6303 RegsToPass[i].second, InFlag);
6304 InFlag = Chain.getValue(1);
6305 }
6306
6307 if (CFlags.IsTailCall && !IsSibCall)
6308 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6309 TailCallArguments);
6310
6311 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6312 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6313}
6314
6315static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6316 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6317 CCState &State) {
6318
6319 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6320 State.getMachineFunction().getSubtarget());
6321 const bool IsPPC64 = Subtarget.isPPC64();
6322 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6323 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6324
6325 if (ValVT.isVector() && !State.getMachineFunction()
6326 .getTarget()
6327 .Options.EnableAIXExtendedAltivecABI)
6328 report_fatal_error("the default Altivec AIX ABI is not yet supported");
6329
6330 if (ValVT == MVT::f128)
6331 report_fatal_error("f128 is unimplemented on AIX.");
6332
6333 if (ArgFlags.isNest())
6334 report_fatal_error("Nest arguments are unimplemented.");
6335
6336 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6337 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6338 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6339 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6340 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6341 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6342
6343 static const MCPhysReg VR[] = {// Vector registers.
6344 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6345 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6346 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6347
6348 if (ArgFlags.isByVal()) {
6349 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6350 report_fatal_error("Pass-by-value arguments with alignment greater than "
6351 "register width are not supported.");
6352
6353 const unsigned ByValSize = ArgFlags.getByValSize();
6354
6355 // An empty aggregate parameter takes up no storage and no registers,
6356 // but needs a MemLoc for a stack slot for the formal arguments side.
6357 if (ByValSize == 0) {
6359 State.getNextStackOffset(), RegVT,
6360 LocInfo));
6361 return false;
6362 }
6363
6364 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6365 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6366 for (const unsigned E = Offset + StackSize; Offset < E;
6367 Offset += PtrAlign.value()) {
6368 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6369 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6370 else {
6373 LocInfo));
6374 break;
6375 }
6376 }
6377 return false;
6378 }
6379
6380 // Arguments always reserve parameter save area.
6381 switch (ValVT.SimpleTy) {
6382 default:
6383 report_fatal_error("Unhandled value type for argument.");
6384 case MVT::i64:
6385 // i64 arguments should have been split to i32 for PPC32.
6386 assert(IsPPC64 && "PPC32 should have split i64 values.");
6388 case MVT::i1:
6389 case MVT::i32: {
6390 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6391 // AIX integer arguments are always passed in register width.
6392 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6393 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6395 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6396 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6397 else
6398 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6399
6400 return false;
6401 }
6402 case MVT::f32:
6403 case MVT::f64: {
6404 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6405 const unsigned StoreSize = LocVT.getStoreSize();
6406 // Floats are always 4-byte aligned in the PSA on AIX.
6407 // This includes f64 in 64-bit mode for ABI compatibility.
6408 const unsigned Offset =
6409 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6410 unsigned FReg = State.AllocateReg(FPR);
6411 if (FReg)
6412 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6413
6414 // Reserve and initialize GPRs or initialize the PSA as required.
6415 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6416 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6417 assert(FReg && "An FPR should be available when a GPR is reserved.");
6418 if (State.isVarArg()) {
6419 // Successfully reserved GPRs are only initialized for vararg calls.
6420 // Custom handling is required for:
6421 // f64 in PPC32 needs to be split into 2 GPRs.
6422 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6423 State.addLoc(
6424 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6425 }
6426 } else {
6427 // If there are insufficient GPRs, the PSA needs to be initialized.
6428 // Initialization occurs even if an FPR was initialized for
6429 // compatibility with the AIX XL compiler. The full memory for the
6430 // argument will be initialized even if a prior word is saved in GPR.
6431 // A custom memLoc is used when the argument also passes in FPR so
6432 // that the callee handling can skip over it easily.
6433 State.addLoc(
6434 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6435 LocInfo)
6436 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6437 break;
6438 }
6439 }
6440
6441 return false;
6442 }
6443 case MVT::v4f32:
6444 case MVT::v4i32:
6445 case MVT::v8i16:
6446 case MVT::v16i8:
6447 case MVT::v2i64:
6448 case MVT::v2f64:
6449 case MVT::v1i128: {
6450 if (State.isVarArg())
6452 "variadic arguments for vector types are unimplemented for AIX");
6453
6454 if (unsigned VReg = State.AllocateReg(VR))
6455 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6456 else {
6458 "passing vector parameters to the stack is unimplemented for AIX");
6459 }
6460 return false;
6461 }
6462 }
6463 return true;
6464}
6465
6467 bool IsPPC64) {
6468 assert((IsPPC64 || SVT != MVT::i64) &&
6469 "i64 should have been split for 32-bit codegen.");
6470
6471 switch (SVT) {
6472 default:
6473 report_fatal_error("Unexpected value type for formal argument");
6474 case MVT::i1:
6475 case MVT::i32:
6476 case MVT::i64:
6477 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6478 case MVT::f32:
6479 return &PPC::F4RCRegClass;
6480 case MVT::f64:
6481 return &PPC::F8RCRegClass;
6482 case MVT::v4f32:
6483 case MVT::v4i32:
6484 case MVT::v8i16:
6485 case MVT::v16i8:
6486 case MVT::v2i64:
6487 case MVT::v2f64:
6488 case MVT::v1i128:
6489 return &PPC::VRRCRegClass;
6490 }
6491}
6492
6494 SelectionDAG &DAG, SDValue ArgValue,
6495 MVT LocVT, const SDLoc &dl) {
6496 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6497 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6498
6499 if (Flags.isSExt())
6500 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6501 DAG.getValueType(ValVT));
6502 else if (Flags.isZExt())
6503 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6504 DAG.getValueType(ValVT));
6505
6506 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6507}
6508
6509static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6510 const unsigned LASize = FL->getLinkageSize();
6511
6512 if (PPC::GPRCRegClass.contains(Reg)) {
6513 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6514 "Reg must be a valid argument register!");
6515 return LASize + 4 * (Reg - PPC::R3);
6516 }
6517
6518 if (PPC::G8RCRegClass.contains(Reg)) {
6519 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6520 "Reg must be a valid argument register!");
6521 return LASize + 8 * (Reg - PPC::X3);
6522 }
6523
6524 llvm_unreachable("Only general purpose registers expected.");
6525}
6526
6527// AIX ABI Stack Frame Layout:
6528//
6529// Low Memory +--------------------------------------------+
6530// SP +---> | Back chain | ---+
6531// | +--------------------------------------------+ |
6532// | | Saved Condition Register | |
6533// | +--------------------------------------------+ |
6534// | | Saved Linkage Register | |
6535// | +--------------------------------------------+ | Linkage Area
6536// | | Reserved for compilers | |
6537// | +--------------------------------------------+ |
6538// | | Reserved for binders | |
6539// | +--------------------------------------------+ |
6540// | | Saved TOC pointer | ---+
6541// | +--------------------------------------------+
6542// | | Parameter save area |
6543// | +--------------------------------------------+
6544// | | Alloca space |
6545// | +--------------------------------------------+
6546// | | Local variable space |
6547// | +--------------------------------------------+
6548// | | Float/int conversion temporary |
6549// | +--------------------------------------------+
6550// | | Save area for AltiVec registers |
6551// | +--------------------------------------------+
6552// | | AltiVec alignment padding |
6553// | +--------------------------------------------+
6554// | | Save area for VRSAVE register |
6555// | +--------------------------------------------+
6556// | | Save area for General Purpose registers |
6557// | +--------------------------------------------+
6558// | | Save area for Floating Point registers |
6559// | +--------------------------------------------+
6560// +---- | Back chain |
6561// High Memory +--------------------------------------------+
6562//
6563// Specifications:
6564// AIX 7.2 Assembler Language Reference
6565// Subroutine linkage convention
6566
6567SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6568 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6569 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6570 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6571
6572 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6573 CallConv == CallingConv::Fast) &&
6574 "Unexpected calling convention!");
6575
6576 if (getTargetMachine().Options.GuaranteedTailCallOpt)
6577 report_fatal_error("Tail call support is unimplemented on AIX.");
6578
6579 if (useSoftFloat())
6580 report_fatal_error("Soft float support is unimplemented on AIX.");
6581
6582 const PPCSubtarget &Subtarget =
6583 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6584
6585 const bool IsPPC64 = Subtarget.isPPC64();
6586 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6587
6588 // Assign locations to all of the incoming arguments.
6591 MachineFrameInfo &MFI = MF.getFrameInfo();
6592 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6593 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6594
6595 const EVT PtrVT = getPointerTy(MF.getDataLayout());
6596 // Reserve space for the linkage area on the stack.
6597 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6598 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6599 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6600
6602
6603 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6604 CCValAssign &VA = ArgLocs[I++];
6605 MVT LocVT = VA.getLocVT();
6606 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6607 if (VA.isMemLoc() && VA.getValVT().isVector())
6609 "passing vector parameters to the stack is unimplemented for AIX");
6610
6611 // For compatibility with the AIX XL compiler, the float args in the
6612 // parameter save area are initialized even if the argument is available
6613 // in register. The caller is required to initialize both the register
6614 // and memory, however, the callee can choose to expect it in either.
6615 // The memloc is dismissed here because the argument is retrieved from
6616 // the register.
6617 if (VA.isMemLoc() && VA.needsCustom())
6618 continue;
6619
6620 if (VA.isRegLoc()) {
6621 if (VA.getValVT().isScalarInteger())
6623 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
6627 }
6628
6629 if (Flags.isByVal() && VA.isMemLoc()) {
6630 const unsigned Size =
6631 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
6632 PtrByteSize);
6633 const int FI = MF.getFrameInfo().CreateFixedObject(
6634 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
6635 /* IsAliased */ true);
6636 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6637 InVals.push_back(FIN);
6638
6639 continue;
6640 }
6641
6642 if (Flags.isByVal()) {
6643 assert(VA.isRegLoc() && "MemLocs should already be handled.");
6644
6645 const MCPhysReg ArgReg = VA.getLocReg();
6646 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
6647
6648 if (Flags.getNonZeroByValAlign() > PtrByteSize)
6649 report_fatal_error("Over aligned byvals not supported yet.");
6650
6651 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
6652 const int FI = MF.getFrameInfo().CreateFixedObject(
6653 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
6654 /* IsAliased */ true);
6655 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6656 InVals.push_back(FIN);
6657
6658 // Add live ins for all the RegLocs for the same ByVal.
6659 const TargetRegisterClass *RegClass =
6660 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6661
6662 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
6663 unsigned Offset) {
6664 const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
6665 // Since the callers side has left justified the aggregate in the
6666 // register, we can simply store the entire register into the stack
6667 // slot.
6668 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6669 // The store to the fixedstack object is needed becuase accessing a
6670 // field of the ByVal will use a gep and load. Ideally we will optimize
6671 // to extracting the value from the register directly, and elide the
6672 // stores when the arguments address is not taken, but that will need to
6673 // be future work.
6674 SDValue Store = DAG.getStore(
6675 CopyFrom.getValue(1), dl, CopyFrom,
6678
6679 MemOps.push_back(Store);
6680 };
6681
6682 unsigned Offset = 0;
6683 HandleRegLoc(VA.getLocReg(), Offset);
6684 Offset += PtrByteSize;
6685 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
6686 Offset += PtrByteSize) {
6687 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6688 "RegLocs should be for ByVal argument.");
6689
6690 const CCValAssign RL = ArgLocs[I++];
6691 HandleRegLoc(RL.getLocReg(), Offset);
6693 }
6694
6695 if (Offset != StackSize) {
6696 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6697 "Expected MemLoc for remaining bytes.");
6698 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
6699 // Consume the MemLoc.The InVal has already been emitted, so nothing
6700 // more needs to be done.
6701 ++I;
6702 }
6703
6704 continue;
6705 }
6706
6707 EVT ValVT = VA.getValVT();
6708 if (VA.isRegLoc() && !VA.needsCustom()) {
6710 unsigned VReg =
6711 MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
6712 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6713 if (ValVT.isScalarInteger() &&
6714 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
6715 ArgValue =
6716 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
6717 }
6718 InVals.push_back(ArgValue);
6719 continue;
6720 }
6721 if (VA.isMemLoc()) {
6722 const unsigned LocSize = LocVT.getStoreSize();
6723 const unsigned ValSize = ValVT.getStoreSize();
6724 assert((ValSize <= LocSize) &&
6725 "Object size is larger than size of MemLoc");
6726 int CurArgOffset = VA.getLocMemOffset();
6727 // Objects are right-justified because AIX is big-endian.
6728 if (LocSize > ValSize)
6729 CurArgOffset += LocSize - ValSize;
6730 // Potential tail calls could cause overwriting of argument stack slots.
6731 const bool IsImmutable =
6733 (CallConv == CallingConv::Fast));
6734 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6735 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6736 SDValue ArgValue =
6737 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6738 InVals.push_back(ArgValue);
6739 continue;
6740 }
6741 }
6742
6743 // On AIX a minimum of 8 words is saved to the parameter save area.
6744 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
6745 // Area that is at least reserved in the caller of this function.
6746 unsigned CallerReservedArea =
6747 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
6748
6749 // Set the size that is at least reserved in caller of this function. Tail
6750 // call optimized function's reserved stack space needs to be aligned so
6751 // that taking the difference between two stack areas will result in an
6752 // aligned stack.
6753 CallerReservedArea =
6754 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
6755 FuncInfo->setMinReservedArea(CallerReservedArea);
6756
6757 if (isVarArg) {
6758 FuncInfo->setVarArgsFrameIndex(
6759 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
6760 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
6761
6762 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6763 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6764
6765 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6766 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6767 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
6768
6769 // The fixed integer arguments of a variadic function are stored to the
6770 // VarArgsFrameIndex on the stack so that they may be loaded by
6771 // dereferencing the result of va_next.
6772 for (unsigned GPRIndex =
6773 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
6774 GPRIndex < NumGPArgRegs; ++GPRIndex) {
6775
6776 const unsigned VReg =
6777 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
6778 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
6779
6780 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
6781 SDValue Store =
6782 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
6783 MemOps.push_back(Store);
6784 // Increment the address for the next argument to store.
6785 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
6786 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
6787 }
6788 }
6789
6790 if (!MemOps.empty())
6791 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
6792
6793 return Chain;
6794}
6795
6796SDValue PPCTargetLowering::LowerCall_AIX(
6797 SDValue Chain, SDValue Callee, CallFlags CFlags,
6799 const SmallVectorImpl<SDValue> &OutVals,
6800 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6802 const CallBase *CB) const {
6803 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
6804 // AIX ABI stack frame layout.
6805
6806 assert((CFlags.CallConv == CallingConv::C ||
6807 CFlags.CallConv == CallingConv::Cold ||
6808 CFlags.CallConv == CallingConv::Fast) &&
6809 "Unexpected calling convention!");
6810
6811 if (CFlags.IsPatchPoint)
6812 report_fatal_error("This call type is unimplemented on AIX.");
6813
6814 const PPCSubtarget& Subtarget =
6815 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
6816
6819 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
6820 *DAG.getContext());
6821
6822 // Reserve space for the linkage save area (LSA) on the stack.
6823 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
6824 // [SP][CR][LR][2 x reserved][TOC].
6825 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
6826 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6827 const bool IsPPC64 = Subtarget.isPPC64();
6828 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
6829 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6830 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6831 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
6832
6833 // The prolog code of the callee may store up to 8 GPR argument registers to
6834 // the stack, allowing va_start to index over them in memory if the callee
6835 // is variadic.
6836 // Because we cannot tell if this is needed on the caller side, we have to
6837 // conservatively assume that it is needed. As such, make sure we have at
6838 // least enough stack space for the caller to store the 8 GPRs.
6839 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
6840 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
6841 CCInfo.getNextStackOffset());
6842
6843 // Adjust the stack pointer for the new arguments...
6844 // These operations are automatically eliminated by the prolog/epilog pass.
6845 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6846 SDValue CallSeqStart = Chain;
6847
6849 SmallVector<SDValue, 8> MemOpChains;
6850
6851 // Set up a copy of the stack pointer for loading and storing any
6852 // arguments that may not fit in the registers available for argument
6853 // passing.
6854 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
6855 : DAG.getRegister(PPC::R1, MVT::i32);
6856
6857 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
6858 const unsigned ValNo = ArgLocs[I].getValNo();
6859 SDValue Arg = OutVals[ValNo];
6860 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
6861
6862 if (Flags.isByVal()) {
6863 const unsigned ByValSize = Flags.getByValSize();
6864
6865 // Nothing to do for zero-sized ByVals on the caller side.
6866 if (!ByValSize) {
6867 ++I;
6868 continue;
6869 }
6870
6871 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
6872 return DAG.getExtLoad(
6873 ISD::ZEXTLOAD, dl, PtrVT, Chain,
6874 (LoadOffset != 0)
6875 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6876 : Arg,
6877 MachinePointerInfo(), VT);
6878 };
6879
6880 unsigned LoadOffset = 0;
6881
6882 // Initialize registers, which are fully occupied by the by-val argument.
6883 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
6884 SDValue Load = GetLoad(PtrVT, LoadOffset);
6885 MemOpChains.push_back(Load.getValue(1));
6886 LoadOffset += PtrByteSize;
6887 const CCValAssign &ByValVA = ArgLocs[I++];
6888 assert(ByValVA.getValNo() == ValNo &&
6889 "Unexpected location for pass-by-value argument.");
6890 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
6891 }
6892
6893 if (LoadOffset == ByValSize)
6894 continue;
6895
6896 // There must be one more loc to handle the remainder.
6897 assert(ArgLocs[I].getValNo() == ValNo &&
6898 "Expected additional location for by-value argument.");
6899
6900 if (ArgLocs[I].isMemLoc()) {
6901 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
6902 const CCValAssign &ByValVA = ArgLocs[I++];
6903 ISD::ArgFlagsTy MemcpyFlags = Flags;
6904 // Only memcpy the bytes that don't pass in register.
6905 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
6906 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
6907 (LoadOffset != 0)
6908 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6909 : Arg,
6910 DAG.getObjectPtrOffset(dl, StackPtr,
6911 TypeSize::Fixed(ByValVA.getLocMemOffset())),
6912 CallSeqStart, MemcpyFlags, DAG, dl);
6913 continue;
6914 }
6915
6916 // Initialize the final register residue.
6917 // Any residue that occupies the final by-val arg register must be
6918 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
6919 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
6920 // 2 and 1 byte loads.
6921 const unsigned ResidueBytes = ByValSize % PtrByteSize;
6922 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
6923 "Unexpected register residue for by-value argument.");
6924 SDValue ResidueVal;
6925 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
6926 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
6927 const MVT VT =
6928 N == 1 ? MVT::i8
6929 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
6930 SDValue Load = GetLoad(VT, LoadOffset);
6931 MemOpChains.push_back(Load.getValue(1));
6932 LoadOffset += N;
6933 Bytes += N;
6934
6935 // By-val arguments are passed left-justfied in register.
6936 // Every load here needs to be shifted, otherwise a full register load
6937 // should have been used.
6938 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
6939 "Unexpected load emitted during handling of pass-by-value "
6940 "argument.");
6941 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
6942 EVT ShiftAmountTy =
6943 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
6944 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
6945 SDValue ShiftedLoad =
6946 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
6947 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
6948 ShiftedLoad)
6949 : ShiftedLoad;
6950 }
6951
6952 const CCValAssign &ByValVA = ArgLocs[I++];
6953 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
6954 continue;
6955 }
6956
6957 CCValAssign &VA = ArgLocs[I++];
6958 const MVT LocVT = VA.getLocVT();
6959 const MVT ValVT = VA.getValVT();
6960
6961 if (VA.isMemLoc() && VA.getValVT().isVector())
6963 "passing vector parameters to the stack is unimplemented for AIX");
6964
6965 switch (VA.getLocInfo()) {
6966 default:
6967 report_fatal_error("Unexpected argument extension type.");
6968 case CCValAssign::Full:
6969 break;
6970 case CCValAssign::ZExt:
6971 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6972 break;
6973 case CCValAssign::SExt:
6974 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6975 break;
6976 }
6977
6978 if (VA.isRegLoc() && !VA.needsCustom()) {
6979 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6980 continue;
6981 }
6982
6983 if (VA.isMemLoc()) {
6984 SDValue PtrOff =
6985 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
6986 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6987 MemOpChains.push_back(
6988 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6989
6990 continue;
6991 }
6992
6993 // Custom handling is used for GPR initializations for vararg float
6994 // arguments.
6995 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
6996 ValVT.isFloatingPoint() && LocVT.isInteger() &&
6997 "Unexpected register handling for calling convention.");
6998
6999 SDValue ArgAsInt =
7001
7002 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7003 // f32 in 32-bit GPR
7004 // f64 in 64-bit GPR
7005 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7006 else if (Arg.getValueType().getFixedSizeInBits() <
7007 LocVT.getFixedSizeInBits())
7008 // f32 in 64-bit GPR.
7009 RegsToPass.push_back(std::make_pair(
7010 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7011 else {
7012 // f64 in two 32-bit GPRs
7013 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7014 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7015 "Unexpected custom register for argument!");
7016 CCValAssign &GPR1 = VA;
7017 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7018 DAG.getConstant(32, dl, MVT::i8));
7019 RegsToPass.push_back(std::make_pair(
7020 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7021
7022 if (I != E) {
7023 // If only 1 GPR was available, there will only be one custom GPR and
7024 // the argument will also pass in memory.
7025 CCValAssign &PeekArg = ArgLocs[I];
7026 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7027 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7028 CCValAssign &GPR2 = ArgLocs[I++];
7029 RegsToPass.push_back(std::make_pair(
7030 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7031 }
7032 }
7033 }
7034 }
7035
7036 if (!MemOpChains.empty())
7037 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7038
7039 // For indirect calls, we need to save the TOC base to the stack for
7040 // restoration after the call.
7041 if (CFlags.IsIndirect) {
7042 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7043 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7044 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7045 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7046 const unsigned TOCSaveOffset =
7047 Subtarget.getFrameLowering()->getTOCSaveOffset();
7048
7049 setUsesTOCBasePtr(DAG);
7050 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7051 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7052 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7053 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7054 Chain = DAG.getStore(
7055 Val.getValue(1), dl, Val, AddPtr,
7056 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7057 }
7058
7059 // Build a sequence of copy-to-reg nodes chained together with token chain
7060 // and flag operands which copy the outgoing args into the appropriate regs.
7061 SDValue InFlag;
7062 for (auto Reg : RegsToPass) {
7063 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7064 InFlag = Chain.getValue(1);
7065 }
7066
7067 const int SPDiff = 0;
7068 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7069 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7070}
7071
7072bool
7073PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7074 MachineFunction &MF, bool isVarArg,
7076 LLVMContext &Context) const {
7078 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7079 return CCInfo.CheckReturn(
7080 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7082 : RetCC_PPC);
7083}
7084
7085SDValue
7086PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7087 bool isVarArg,
7089 const SmallVectorImpl<SDValue> &OutVals,
7090 const SDLoc &dl, SelectionDAG &DAG) const {
7092 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7093 *DAG.getContext());
7094 CCInfo.AnalyzeReturn(Outs,
7095 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7097 : RetCC_PPC);
7098
7099 SDValue Flag;
7100 SmallVector<SDValue, 4> RetOps(1, Chain);
7101
7102 // Copy the result values into the output registers.
7103 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7104 CCValAssign &VA = RVLocs[i];
7105 assert(VA.isRegLoc() && "Can only return in registers!");
7106
7107 SDValue Arg = OutVals[RealResIdx];
7108
7109 switch (VA.getLocInfo()) {
7110 default: llvm_unreachable("Unknown loc info!");
7111 case CCValAssign::Full: break;
7112 case CCValAssign::AExt:
7113 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7114 break;
7115 case CCValAssign::ZExt:
7116 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7117 break;
7118 case CCValAssign::SExt:
7119 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7120 break;
7121 }
7122 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7123 bool isLittleEndian = Subtarget.isLittleEndian();
7124 // Legalize ret f64 -> ret 2 x i32.
7125 SDValue SVal =
7127 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7128 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7129 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7130 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7131 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7132 Flag = Chain.getValue(1);
7133 VA = RVLocs[++i]; // skip ahead to next loc
7134 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7135 } else
7136 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7137 Flag = Chain.getValue(1);
7138 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7139 }
7140
7141 RetOps[0] = Chain; // Update chain.
7142
7143 // Add the flag if we have it.
7144 if (Flag.getNode())
7145 RetOps.push_back(Flag);
7146
7147 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7148}
7149
7150SDValue
7151PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7152 SelectionDAG &DAG) const {
7153 SDLoc dl(Op);
7154
7155 // Get the correct type for integers.
7156 EVT IntVT = Op.getValueType();
7157
7158 // Get the inputs.
7159 SDValue Chain = Op.getOperand(0);
7160 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7161 // Build a DYNAREAOFFSET node.
7162 SDValue Ops[2] = {Chain, FPSIdx};
7163 SDVTList VTs = DAG.getVTList(IntVT);
7164 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7165}
7166
7167SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7168 SelectionDAG &DAG) const {
7169 // When we pop the dynamic allocation we need to restore the SP link.
7170 SDLoc dl(Op);
7171
7172 // Get the correct type for pointers.
7173 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7174
7175 // Construct the stack pointer operand.
7176 bool isPPC64 = Subtarget.isPPC64();
7177 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7178 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7179
7180 // Get the operands for the STACKRESTORE.
7181 SDValue Chain = Op.getOperand(0);
7182 SDValue SaveSP = Op.getOperand(1);
7183
7184 // Load the old link SP.
7185 SDValue LoadLinkSP =
7186 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7187
7188 // Restore the stack pointer.
7189 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7190
7191 // Store the old link SP.
7192 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7193}
7194
7195SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7197 bool isPPC64 = Subtarget.isPPC64();
7198 EVT PtrVT = getPointerTy(MF.getDataLayout());
7199
7200 // Get current frame pointer save index. The users of this index will be
7201 // primarily DYNALLOC instructions.
7203 int RASI = FI->getReturnAddrSaveIndex();
7204
7205 // If the frame pointer save index hasn't been defined yet.
7206 if (!RASI) {
7207 // Find out what the fix offset of the frame pointer save area.
7208 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7209 // Allocate the frame index for frame pointer save area.
7210 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7211 // Save the result.
7212 FI->setReturnAddrSaveIndex(RASI);
7213 }
7214 return DAG.getFrameIndex(RASI, PtrVT);
7215}
7216
7217SDValue
7218PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7220 bool isPPC64 = Subtarget.isPPC64();
7221 EVT PtrVT = getPointerTy(MF.getDataLayout());
7222
7223 // Get current frame pointer save index. The users of this index will be
7224 // primarily DYNALLOC instructions.
7226 int FPSI = FI->getFramePointerSaveIndex();
7227
7228 // If the frame pointer save index hasn't been defined yet.
7229 if (!FPSI) {
7230 // Find out what the fix offset of the frame pointer save area.
7231 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7232 // Allocate the frame index for frame pointer save area.
7233 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7234 // Save the result.
7235 FI->setFramePointerSaveIndex(FPSI);
7236 }
7237 return DAG.getFrameIndex(FPSI, PtrVT);
7238}
7239
7240SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7241 SelectionDAG &DAG) const {
7243 // Get the inputs.
7244 SDValue Chain = Op.getOperand(0);
7245 SDValue Size = Op.getOperand(1);
7246 SDLoc dl(Op);
7247
7248 // Get the correct type for pointers.
7249 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7250 // Negate the size.
7251 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7252 DAG.getConstant(0, dl, PtrVT), Size);
7253 // Construct a node for the frame pointer save index.
7254 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7255 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7256 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7257 if (hasInlineStackProbe(MF))
7258 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7259 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7260}
7261
7262SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7263 SelectionDAG &DAG) const {
7265
7266 bool isPPC64 = Subtarget.isPPC64();
7267 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7268
7269 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7270 return DAG.getFrameIndex(FI, PtrVT);
7271}
7272
7273SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7274 SelectionDAG &DAG) const {
7275 SDLoc DL(Op);
7276 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7278 Op.getOperand(0), Op.getOperand(1));
7279}
7280
7281SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7282 SelectionDAG &DAG) const {
7283 SDLoc DL(Op);
7285 Op.getOperand(0), Op.getOperand(1));
7286}
7287
7288SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7289 if (Op.getValueType().isVector())
7290 return LowerVectorLoad(Op, DAG);
7291
7292 assert(Op.getValueType() == MVT::i1 &&
7293 "Custom lowering only for i1 loads");
7294
7295 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7296
7297 SDLoc dl(Op);
7299
7300 SDValue Chain = LD->getChain();
7301 SDValue BasePtr = LD->getBasePtr();
7302 MachineMemOperand *MMO = LD->getMemOperand();
7303
7304 SDValue NewLD =
7305 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7306 BasePtr, MVT::i8, MMO);
7307 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7308
7309 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7310 return DAG.getMergeValues(Ops, dl);
7311}
7312
7313SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7314 if (Op.getOperand(1).getValueType().isVector())
7315 return LowerVectorStore(Op, DAG);
7316
7317 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7318 "Custom lowering only for i1 stores");
7319
7320 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7321
7322 SDLoc dl(Op);
7324
7325 SDValue Chain = ST->getChain();
7326 SDValue BasePtr = ST->getBasePtr();
7327 SDValue Value = ST->getValue();
7328 MachineMemOperand *MMO = ST->getMemOperand();
7329
7331 Value);
7332 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7333}
7334
7335// FIXME: Remove this once the ANDI glue bug is fixed:
7336SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7337 assert(Op.getValueType() == MVT::i1 &&
7338 "Custom lowering only for i1 results");
7339
7340 SDLoc DL(Op);
7341 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7342}
7343
7344SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7345 SelectionDAG &DAG) const {
7346
7347 // Implements a vector truncate that fits in a vector register as a shuffle.
7348 // We want to legalize vector truncates down to where the source fits in
7349 // a vector register (and target is therefore smaller than vector register
7350 // size). At that point legalization will try to custom lower the sub-legal
7351 // result and get here - where we can contain the truncate as a single target
7352 // operation.
7353
7354 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7355 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7356 //
7357 // We will implement it for big-endian ordering as this (where x denotes
7358 // undefined):
7359 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7360 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7361 //
7362 // The same operation in little-endian ordering will be:
7363 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7364 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7365
7366 EVT TrgVT = Op.getValueType();
7367 assert(TrgVT.isVector() && "Vector type expected.");
7368 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7369 EVT EltVT = TrgVT.getVectorElementType();
7370 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7371 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7372 !isPowerOf2_32(EltVT.getSizeInBits()))
7373 return SDValue();
7374
7375 SDValue N1 = Op.getOperand(0);
7376 EVT SrcVT = N1.getValueType();
7377 unsigned SrcSize = SrcVT.getSizeInBits();
7378 if (SrcSize > 256 ||
7381 return SDValue();
7382 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7383 return SDValue();
7384
7385 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7386 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7387
7388 SDLoc DL(Op);
7389 SDValue Op1, Op2;
7390 if (SrcSize == 256) {
7391 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7392 EVT SplitVT =
7394 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7395 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7396 DAG.getConstant(0, DL, VecIdxTy));
7397 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7398 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7399 }
7400 else {
7401 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7402 Op2 = DAG.getUNDEF(WideVT);
7403 }
7404
7405 // First list the elements we want to keep.
7406 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7407 SmallVector<int, 16> ShuffV;
7408 if (Subtarget.isLittleEndian())
7409 for (unsigned i = 0; i < TrgNumElts; ++i)
7410 ShuffV.push_back(i * SizeMult);
7411 else
7412 for (unsigned i = 1; i <= TrgNumElts; ++i)
7413 ShuffV.push_back(i * SizeMult - 1);
7414
7415 // Populate the remaining elements with undefs.
7416 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7417 // ShuffV.push_back(i + WideNumElts);
7418 ShuffV.push_back(WideNumElts + 1);
7419
7420 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7421 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7422 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7423}
7424
7425/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7426/// possible.
7427SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7428 // Not FP, or using SPE? Not a fsel.
7429 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7430 !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7431 return Op;
7432
7433 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7434
7435 EVT ResVT = Op.getValueType();
7436 EVT CmpVT = Op.getOperand(0).getValueType();
7437 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7438 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7439 SDLoc dl(Op);
7440 SDNodeFlags Flags = Op.getNode()->getFlags();
7441
7442 // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7443 // presence of infinities.
7444 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7445 switch (CC) {
7446 default:
7447 break;
7448 case ISD::SETOGT:
7449 case ISD::SETGT:
7450 return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7451 case ISD::SETOLT:
7452 case ISD::SETLT:
7453 return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7454 }
7455 }
7456
7457 // We might be able to do better than this under some circumstances, but in
7458 // general, fsel-based lowering of select is a finite-math-only optimization.
7459 // For more information, see section F.3 of the 2.06 ISA specification.
7460 // With ISA 3.0
7461 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7462 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7463 return Op;
7464
7465 // If the RHS of the comparison is a 0.0, we don't need to do the
7466 // subtraction at all.
7467 SDValue Sel1;
7468 if (isFloatingPointZero(RHS))
7469 switch (CC) {
7470 default: break; // SETUO etc aren't handled by fsel.
7471 case ISD::SETNE:
7472 std::swap(TV, FV);
7474 case ISD::SETEQ:
7475 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7476 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7477 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7478 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7479 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7480 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7481 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7482 case ISD::SETULT:
7483 case ISD::SETLT:
7484 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7486 case ISD::SETOGE:
7487 case ISD::SETGE:
7488 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7489 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7490 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7491 case ISD::SETUGT:
7492 case ISD::SETGT:
7493 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7495 case ISD::SETOLE:
7496 case ISD::SETLE:
7497 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7498 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7499 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7500 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7501 }
7502
7503 SDValue Cmp;
7504 switch (CC) {
7505 default: break; // SETUO etc aren't handled by fsel.
7506 case ISD::SETNE:
7507 std::swap(TV, FV);
7509 case ISD::SETEQ:
7510 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7511 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7512 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7513 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7514 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7515 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7516 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7517 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7518 case ISD::SETULT:
7519 case ISD::SETLT:
7520 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7521 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7522 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7523 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7524 case ISD::SETOGE:
7525 case ISD::SETGE:
7526 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7527 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7528 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7529 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7530 case ISD::SETUGT:
7531 case ISD::SETGT:
7532 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7533 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7534 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7535 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7536 case ISD::SETOLE:
7537 case ISD::SETLE:
7538 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7539 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7540 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7541 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7542 }
7543 return Op;
7544}
7545
7546static unsigned getPPCStrictOpcode(unsigned Opc) {
7547 switch (Opc) {
7548 default:
7549 llvm_unreachable("No strict version of this opcode!");
7550 case PPCISD::FCTIDZ:
7551 return PPCISD::STRICT_FCTIDZ;
7552 case PPCISD::FCTIWZ:
7553 return PPCISD::STRICT_FCTIWZ;
7554 case PPCISD::FCTIDUZ:
7556 case PPCISD::FCTIWUZ:
7558 case PPCISD::FCFID:
7559 return PPCISD::STRICT_FCFID;
7560 case PPCISD::FCFIDU:
7561 return PPCISD::STRICT_FCFIDU;
7562 case PPCISD::FCFIDS:
7563 return PPCISD::STRICT_FCFIDS;
7564 case PPCISD::FCFIDUS:
7566 }
7567}
7568
7570 const PPCSubtarget &Subtarget) {
7571 SDLoc dl(Op);
7572 bool IsStrict = Op->isStrictFPOpcode();
7573 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7574 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7575
7576 // TODO: Any other flags to propagate?
7577 SDNodeFlags Flags;
7578 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7579
7580 // For strict nodes, source is the second operand.
7581 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7582 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7584 if (Src.getValueType() == MVT::f32) {
7585 if (IsStrict) {
7586 Src =
7588 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
7589 Chain = Src.getValue(1);
7590 } else
7591 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7592 }
7593 SDValue Conv;
7594 unsigned Opc = ISD::DELETED_NODE;
7595 switch (Op.getSimpleValueType().SimpleTy) {
7596 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7597 case MVT::i32:
7598 Opc = IsSigned ? PPCISD::FCTIWZ
7599 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
7600 break;
7601 case MVT::i64:
7602 assert((IsSigned || Subtarget.hasFPCVT()) &&
7603 "i64 FP_TO_UINT is supported only with FPCVT");
7604 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
7605 }
7606 if (IsStrict) {
7607 Opc = getPPCStrictOpcode(Opc);
7608 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
7609 {Chain, Src}, Flags);
7610 } else {
7611 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
7612 }
7613 return Conv;
7614}
7615
7616void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7617 SelectionDAG &DAG,
7618 const SDLoc &dl) const {
7619 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
7620 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7621 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7622 bool IsStrict = Op->isStrictFPOpcode();
7623
7624 // Convert the FP value to an int value through memory.
7625 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7626 (IsSigned || Subtarget.hasFPCVT());
7627 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7628 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7629 MachinePointerInfo MPI =
7631
7632 // Emit a store to the stack slot.
7633 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
7634 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
7635 if (i32Stack) {
7637 Alignment = Align(4);
7638 MachineMemOperand *MMO =
7639 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
7640 SDValue Ops[] = { Chain, Tmp, FIPtr };
7641 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7642 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7643 } else
7644 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
7645
7646 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7647 // add in a bias on big endian.
7648 if (Op.getValueType() == MVT::i32 && !i32Stack) {
7649 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7650 DAG.getConstant(4, dl, FIPtr.getValueType()));
7651 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7652 }
7653
7654 RLI.Chain = Chain;
7655 RLI.Ptr = FIPtr;
7656 RLI.MPI = MPI;
7657 RLI.Alignment = Alignment;
7658}
7659
7660/// Custom lowers floating point to integer conversions to use
7661/// the direct move instructions available in ISA 2.07 to avoid the
7662/// need for load/store combinations.
7663SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7664 SelectionDAG &DAG,
7665 const SDLoc &dl) const {
7666 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
7667 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
7668 if (Op->isStrictFPOpcode())
7669 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
7670 else
7671 return Mov;
7672}
7673
7674SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7675 const SDLoc &dl) const {
7676 bool IsStrict = Op->isStrictFPOpcode();
7677 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7678 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7679 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7680 EVT SrcVT = Src.getValueType();
7681 EVT DstVT = Op.getValueType();
7682
7683 // FP to INT conversions are legal for f128.
7684 if (SrcVT == MVT::f128)
7685 return Subtarget.hasP9Vector() ? Op : SDValue();
7686
7687 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7688 // PPC (the libcall is not available).
7689 if (SrcVT == MVT::ppcf128) {
7690 if (DstVT == MVT::i32) {
7691 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
7692 // set other fast-math flags to FP operations in both strict and
7693 // non-strict cases. (FP_TO_SINT, FSUB)
7694 SDNodeFlags Flags;
7695 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7696
7697 if (IsSigned) {
7699 DAG.getIntPtrConstant(0, dl));
7701 DAG.getIntPtrConstant(1, dl));
7702
7703 // Add the two halves of the long double in round-to-zero mode, and use
7704 // a smaller FP_TO_SINT.
7705 if (IsStrict) {
7708 {Op.getOperand(0), Lo, Hi}, Flags);
7709 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7711 {Res.getValue(1), Res}, Flags);
7712 } else {
7713 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7714 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7715 }
7716 } else {
7717 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7718 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7719 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7720 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
7721 if (IsStrict) {
7722 // Sel = Src < 0x80000000
7723 // FltOfs = select Sel, 0.0, 0x80000000
7724 // IntOfs = select Sel, 0, 0x80000000
7725 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7726 SDValue Chain = Op.getOperand(0);
7727 EVT SetCCVT =
7728 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7729 EVT DstSetCCVT =
7730 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7731 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7732 Chain, true);
7733 Chain = Sel.getValue(1);
7734
7735 SDValue FltOfs = DAG.getSelect(
7736 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7737 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7738
7739 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
7740 DAG.getVTList(SrcVT, MVT::Other),
7741 {Chain, Src, FltOfs}, Flags);
7742 Chain = Val.getValue(1);
7743 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7744 DAG.getVTList(DstVT, MVT::Other),
7745 {Chain, Val}, Flags);
7746 Chain = SInt.getValue(1);
7747 SDValue IntOfs = DAG.getSelect(
7748 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
7749 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7750 return DAG.getMergeValues({Result, Chain}, dl);
7751 } else {
7752 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7753 // FIXME: generated code sucks.
7754 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
7755 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7756 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
7757 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
7758 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
7759 }
7760 }
7761 }
7762
7763 return SDValue();
7764 }
7765
7766 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7767 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7768
7769 ReuseLoadInfo RLI;
7770 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7771
7772 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7773 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7774}
7775
7776// We're trying to insert a regular store, S, and then a load, L. If the
7777// incoming value, O, is a load, we might just be able to have our load use the
7778// address used by O. However, we don't know if anything else will store to
7779// that address before we can load from it. To prevent this situation, we need
7780// to insert our load, L, into the chain as a peer of O. To do this, we give L
7781// the same chain operand as O, we create a token factor from the chain results
7782// of O and L, and we replace all uses of O's chain result with that token
7783// factor (see spliceIntoChain below for this last part).
7784bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7785 ReuseLoadInfo &RLI,
7786 SelectionDAG &DAG,
7787 ISD::LoadExtType ET) const {
7788 // Conservatively skip reusing for constrained FP nodes.
7789 if (Op->isStrictFPOpcode())
7790 return false;
7791
7792 SDLoc dl(Op);
7793 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
7794 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
7795 if (ET == ISD::NON_EXTLOAD &&
7796 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
7797 isOperationLegalOrCustom(Op.getOpcode(),
7798 Op.getOperand(0).getValueType())) {
7799
7800 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7801 return true;
7802 }
7803
7805 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7806 LD->isNonTemporal())
7807 return false;
7808 if (LD->getMemoryVT() != MemVT)
7809 return false;
7810
7811 // If the result of the load is an illegal type, then we can't build a
7812 // valid chain for reuse since the legalised loads and token factor node that
7813 // ties the legalised loads together uses a different output chain then the
7814 // illegal load.
7815 if (!isTypeLegal(LD->getValueType(0)))
7816 return false;
7817
7818 RLI.Ptr = LD->getBasePtr();
7819 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7820 assert(LD->getAddressingMode() == ISD::PRE_INC &&
7821 "Non-pre-inc AM on PPC?");
7822 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7823 LD->getOffset());
7824 }
7825
7826 RLI.Chain = LD->getChain();
7827 RLI.MPI = LD->getPointerInfo();
7828 RLI.IsDereferenceable = LD->isDereferenceable();
7829 RLI.IsInvariant = LD->isInvariant();
7830 RLI.Alignment = LD->getAlign();
7831 RLI.AAInfo = LD->getAAInfo();
7832 RLI.Ranges = LD->getRanges();
7833
7834 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7835 return true;
7836}
7837
7838// Given the head of the old chain, ResChain, insert a token factor containing
7839// it and NewResChain, and make users of ResChain now be users of that token
7840// factor.
7841// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7842void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7843 SDValue NewResChain,
7844 SelectionDAG &DAG) const {
7845 if (!ResChain)
7846 return;
7847
7848 SDLoc dl(NewResChain);
7849
7851 NewResChain, DAG.getUNDEF(MVT::Other));
7852 assert(TF.getNode() != NewResChain.getNode() &&
7853 "A new TF really is required here");
7854
7855 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7856 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7857}
7858
7859/// Analyze profitability of direct move
7860/// prefer float load to int load plus direct move
7861/// when there is no integer use of int load
7862bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7863 SDNode *Origin = Op.getOperand(0).getNode();
7864 if (Origin->getOpcode() != ISD::LOAD)
7865 return true;
7866
7867 // If there is no LXSIBZX/LXSIHZX, like Power8,
7868 // prefer direct move if the memory size is 1 or 2 bytes.
7869 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7870 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7871 return true;
7872
7873 for (SDNode::use_iterator UI = Origin->use_begin(),
7874 UE = Origin->use_end();
7875 UI != UE; ++UI) {
7876
7877 // Only look at the users of the loaded value.
7878 if (UI.getUse().get().getResNo() != 0)
7879 continue;
7880
7881 if (UI->getOpcode() != ISD::SINT_TO_FP &&
7882 UI->getOpcode() != ISD::UINT_TO_FP &&
7883 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
7884 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
7885 return true;
7886 }
7887
7888 return false;
7889}
7890
7892 const PPCSubtarget &Subtarget,
7893 SDValue Chain = SDValue()) {
7894 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7895 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7896 SDLoc dl(Op);
7897
7898 // TODO: Any other flags to propagate?
7899 SDNodeFlags Flags;
7900 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7901
7902 // If we have FCFIDS, then use it when converting to single-precision.
7903 // Otherwise, convert to double-precision and then round.
7904 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
7905 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
7906 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
7907 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
7908 if (Op->isStrictFPOpcode()) {
7909 if (!Chain)
7910 Chain = Op.getOperand(0);
7911 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
7912 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
7913 } else
7914 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
7915}
7916
7917/// Custom lowers integer to floating point conversions to use
7918/// the direct move instructions available in ISA 2.07 to avoid the
7919/// need for load/store combinations.
7920SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7921 SelectionDAG &DAG,
7922 const SDLoc &dl) const {
7923 assert((Op.getValueType() == MVT::f32 ||
7924 Op.getValueType() == MVT::f64) &&
7925 "Invalid floating point type as target of conversion");
7926 assert(Subtarget.hasFPCVT() &&
7927 "Int to FP conversions with direct moves require FPCVT");
7928 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7929 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7930 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
7931 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7932 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
7933 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
7934 return convertIntToFP(Op, Mov, DAG, Subtarget);
7935}
7936
7937static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7938
7939 EVT VecVT = Vec.getValueType();
7940 assert(VecVT.isVector() && "Expected a vector type.");
7941 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7942
7943 EVT EltVT = VecVT.getVectorElementType();
7944 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7945 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7946
7947 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7948 SmallVector<SDValue, 16> Ops(NumConcat);
7949 Ops[0] = Vec;
7950 SDValue UndefVec = DAG.getUNDEF(VecVT);
7951 for (unsigned i = 1; i < NumConcat; ++i)
7952 Ops[i] = UndefVec;
7953
7954 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7955}
7956
7957SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7958 const SDLoc &dl) const {
7959 bool IsStrict = Op->isStrictFPOpcode();
7960 unsigned Opc = Op.getOpcode();
7961 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7962 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
7964 "Unexpected conversion type");
7965 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7966 "Supports conversions to v2f64/v4f32 only.");
7967
7968 // TODO: Any other flags to propagate?
7969 SDNodeFlags Flags;
7970 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7971
7972 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
7973 bool FourEltRes = Op.getValueType() == MVT::v4f32;
7974
7975 SDValue Wide = widenVec(DAG, Src, dl);
7976 EVT WideVT = Wide.getValueType();
7977 unsigned WideNumElts = WideVT.getVectorNumElements();
7978 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7979
7980 SmallVector<int, 16> ShuffV;
7981 for (unsigned i = 0; i < WideNumElts; ++i)
7982 ShuffV.push_back(i + WideNumElts);
7983
7984 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7985 int SaveElts = FourEltRes ? 4 : 2;
7986 if (Subtarget.isLittleEndian())
7987 for (int i = 0; i < SaveElts; i++)
7988 ShuffV[i * Stride] = i;
7989 else
7990 for (int i = 1; i <= SaveElts; i++)
7991 ShuffV[i * Stride - 1] = i - 1;
7992
7993 SDValue ShuffleSrc2 =
7994 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7995 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7996
7997 SDValue Extend;
7998 if (SignedConv) {
7999 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8000 EVT ExtVT = Src.getValueType();
8001 if (Subtarget.hasP9Altivec())
8002 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8003 IntermediateVT.getVectorNumElements());
8004
8005 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8006 DAG.getValueType(ExtVT));
8007 } else
8008 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8009
8010 if (IsStrict)
8011 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8012 {Op.getOperand(0), Extend}, Flags);
8013
8014 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8015}
8016
8017SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8018 SelectionDAG &DAG) const {
8019 SDLoc dl(Op);
8020 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8021 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8022 bool IsStrict = Op->isStrictFPOpcode();
8023 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8024 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8025
8026 // TODO: Any other flags to propagate?
8027 SDNodeFlags Flags;
8028 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8029
8030 EVT InVT = Src.getValueType();
8031 EVT OutVT = Op.getValueType();
8032 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8033 isOperationCustom(Op.getOpcode(), InVT))
8034 return LowerINT_TO_FPVector(Op, DAG, dl);
8035
8036 // Conversions to f128 are legal.
8037 if (Op.getValueType() == MVT::f128)
8038 return Subtarget.hasP9Vector() ? Op : SDValue();
8039
8040 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8041 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8042 return SDValue();
8043
8044 if (Src.getValueType() == MVT::i1) {
8045 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8046 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8047 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8048 if (IsStrict)
8049 return DAG.getMergeValues({Sel, Chain}, dl);
8050 else
8051 return Sel;
8052 }
8053
8054 // If we have direct moves, we can do all the conversion, skip the store/load
8055 // however, without FPCVT we can't do most conversions.
8056 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8057 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8058 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8059
8060 assert((IsSigned || Subtarget.hasFPCVT()) &&
8061 "UINT_TO_FP is supported only with FPCVT");
8062
8063 if (Src.getValueType() == MVT::i64) {
8064 SDValue SINT = Src;
8065 // When converting to single-precision, we actually need to convert
8066 // to double-precision first and then round to single-precision.
8067 // To avoid double-rounding effects during that operation, we have
8068 // to prepare the input operand. Bits that might be truncated when
8069 // converting to double-precision are replaced by a bit that won't
8070 // be lost at this stage, but is below the single-precision rounding
8071 // position.
8072 //
8073 // However, if -enable-unsafe-fp-math is in effect, accept double
8074 // rounding to avoid the extra overhead.
8075 if (Op.getValueType() == MVT::f32 &&
8076 !Subtarget.hasFPCVT() &&
8078
8079 // Twiddle input to make sure the low 11 bits are zero. (If this
8080 // is the case, we are guaranteed the value will fit into the 53 bit
8081 // mantissa of an IEEE double-precision value without rounding.)
8082 // If any of those low 11 bits were not zero originally, make sure
8083 // bit 12 (value 2048) is set instead, so that the final rounding
8084 // to single-precision gets the correct result.
8085 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8086 SINT, DAG.getConstant(2047, dl, MVT::i64));
8087 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8088 Round, DAG.getConstant(2047, dl, MVT::i64));
8089 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8090 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8091 Round, DAG.getConstant(-2048, dl, MVT::i64));
8092
8093 // However, we cannot use that value unconditionally: if the magnitude
8094 // of the input value is small, the bit-twiddling we did above might
8095 // end up visibly changing the output. Fortunately, in that case, we
8096 // don't need to twiddle bits since the original input will convert
8097 // exactly to double-precision floating-point already. Therefore,
8098 // construct a conditional to use the original value if the top 11
8099 // bits are all sign-bit copies, and use the rounded value computed
8100 // above otherwise.
8102 SINT, DAG.getConstant(53, dl, MVT::i32));
8103 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8104 Cond, DAG.getConstant(1, dl, MVT::i64));
8105 Cond = DAG.getSetCC(
8106 dl,
8108 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8109
8110 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8111 }
8112
8113 ReuseLoadInfo RLI;
8114 SDValue Bits;
8115
8117 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8118 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8119 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8120 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8121 } else if (Subtarget.hasLFIWAX() &&
8122 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8123 MachineMemOperand *MMO =
8125 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8126 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8129 Ops, MVT::i32, MMO);
8130 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8131 } else if (Subtarget.hasFPCVT() &&
8132 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8133 MachineMemOperand *MMO =
8135 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8136 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8139 Ops, MVT::i32, MMO);
8140 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8141 } else if (((Subtarget.hasLFIWAX() &&
8142 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8143 (Subtarget.hasFPCVT() &&
8144 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8145 SINT.getOperand(0).getValueType() == MVT::i32) {
8146 MachineFrameInfo &MFI = MF.getFrameInfo();
8147 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8148
8149 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8150 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8151
8152 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8154 DAG.getMachineFunction(), FrameIdx));
8155 Chain = Store;
8156
8157 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8158 "Expected an i32 store");
8159
8160 RLI.Ptr = FIdx;
8161 RLI.Chain = Chain;
8162 RLI.MPI =
8164 RLI.Alignment = Align(4);
8165
8166 MachineMemOperand *MMO =
8168 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8169 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8172 dl, DAG.getVTList(MVT::f64, MVT::Other),
8173 Ops, MVT::i32, MMO);
8174 Chain = Bits.getValue(1);
8175 } else
8176 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8177
8178 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8179 if (IsStrict)
8180 Chain = FP.getValue(1);
8181
8182 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8183 if (IsStrict)
8186 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8187 else
8188 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8189 DAG.getIntPtrConstant(0, dl));
8190 }
8191 return FP;
8192 }
8193
8194 assert(Src.getValueType() == MVT::i32 &&
8195 "Unhandled INT_TO_FP type in custom expander!");
8196 // Since we only generate this in 64-bit mode, we can take advantage of
8197 // 64-bit registers. In particular, sign extend the input value into the
8198 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8199 // then lfd it and fcfid it.
8201 MachineFrameInfo &MFI = MF.getFrameInfo();
8202 EVT PtrVT = getPointerTy(MF.getDataLayout());
8203
8204 SDValue Ld;
8205 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8206 ReuseLoadInfo RLI;
8207 bool ReusingLoad;
8208 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8209 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8210 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8211
8212 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8214 DAG.getMachineFunction(), FrameIdx));
8215 Chain = Store;
8216
8217 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8218 "Expected an i32 store");
8219
8220 RLI.Ptr = FIdx;
8221 RLI.Chain = Chain;
8222 RLI.MPI =
8224 RLI.Alignment = Align(4);
8225 }
8226
8227 MachineMemOperand *MMO =
8229 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8230 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8231 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8233 MVT::i32, MMO);
8234 Chain = Ld.getValue(1);
8235 if (ReusingLoad)
8236 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8237 } else {
8238 assert(Subtarget.isPPC64() &&
8239 "i32->FP without LFIWAX supported only on PPC64");
8240
8241 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8242 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8243
8244 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8245
8246 // STD the extended value into the stack slot.
8247 SDValue Store = DAG.getStore(
8248 Chain, dl, Ext64, FIdx,
8250 Chain = Store;
8251
8252 // Load the value as a double.
8253 Ld = DAG.getLoad(
8254 MVT::f64, dl, Chain, FIdx,
8256 Chain = Ld.getValue(1);
8257 }
8258
8259 // FCFID it and return it.
8260 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8261 if (IsStrict)
8262 Chain = FP.getValue(1);
8263 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8264 if (IsStrict)
8267 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8268 else
8269 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8270 DAG.getIntPtrConstant(0, dl));
8271 }
8272 return FP;
8273}
8274
8275SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8276 SelectionDAG &DAG) const {
8277 SDLoc dl(Op);
8278 /*
8279 The rounding mode is in bits 30:31 of FPSR, and has the following
8280 settings:
8281 00 Round to nearest
8282 01 Round to 0
8283 10 Round to +inf
8284 11 Round to -inf
8285
8286 FLT_ROUNDS, on the other hand, expects the following:
8287 -1 Undefined
8288 0 Round to 0
8289 1 Round to nearest
8290 2 Round to +inf
8291 3 Round to -inf
8292
8293 To perform the conversion, we do:
8294 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8295 */
8296
8298 EVT VT = Op.getValueType();
8299 EVT PtrVT = getPointerTy(MF.getDataLayout());
8300
8301 // Save FP Control Word to register
8302 SDValue Chain = Op.getOperand(0);
8303 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8304 Chain = MFFS.getValue(1);
8305
8306 SDValue CWD;
8307 if (isTypeLegal(MVT::i64)) {
8308 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8309 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8310 } else {
8311 // Save FP register to stack slot
8312 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8313 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8314 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8315
8316 // Load FP Control Word from low 32 bits of stack slot.
8318 "Stack slot adjustment is valid only on big endian subtargets!");
8319 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8320 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8321 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8322 Chain = CWD.getValue(1);
8323 }
8324
8325 // Transform as necessary
8326 SDValue CWD1 =
8327 DAG.getNode(ISD::AND, dl, MVT::i32,
8328 CWD, DAG.getConstant(3, dl, MVT::i32));
8329 SDValue CWD2 =
8330 DAG.getNode(ISD::SRL, dl, MVT::i32,
8331 DAG.getNode(ISD::AND, dl, MVT::i32,
8332 DAG.getNode(ISD::XOR, dl, MVT::i32,
8333 CWD, DAG.getConstant(3, dl, MVT::i32)),
8334 DAG.getConstant(3, dl, MVT::i32)),
8335 DAG.getConstant(1, dl, MVT::i32));
8336
8337 SDValue RetVal =
8338 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8339
8340 RetVal =
8342 dl, VT, RetVal);
8343
8344 return DAG.getMergeValues({RetVal, Chain}, dl);
8345}
8346
8347SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8348 EVT VT = Op.getValueType();
8349 unsigned BitWidth = VT.getSizeInBits();
8350 SDLoc dl(Op);
8351 assert(Op.getNumOperands() == 3 &&
8352 VT == Op.getOperand(1).getValueType() &&
8353 "Unexpected SHL!");
8354
8355 // Expand into a bunch of logical ops. Note that these ops
8356 // depend on the PPC behavior for oversized shift amounts.
8357 SDValue Lo = Op.getOperand(0);
8358 SDValue Hi = Op.getOperand(1);
8359 SDValue Amt = Op.getOperand(2);
8360 EVT AmtVT = Amt.getValueType();
8361
8362 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8363 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8364 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8365 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8366 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8367 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8368 DAG.getConstant(-BitWidth, dl, AmtVT));
8369 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8370 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8371 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8372 SDValue OutOps[] = { OutLo, OutHi };
8373 return DAG.getMergeValues(OutOps, dl);
8374}
8375
8376SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8377 EVT VT = Op.getValueType();
8378 SDLoc dl(Op);
8379 unsigned BitWidth = VT.getSizeInBits();
8380 assert(Op.getNumOperands() == 3 &&
8381 VT == Op.getOperand(1).getValueType() &&
8382 "Unexpected SRL!");
8383
8384 // Expand into a bunch of logical ops. Note that these ops
8385 // depend on the PPC behavior for oversized shift amounts.
8386 SDValue Lo = Op.getOperand(0);
8387 SDValue Hi = Op.getOperand(1);
8388 SDValue Amt = Op.getOperand(2);
8389 EVT AmtVT = Amt.getValueType();
8390
8391 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8392 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8393 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8394 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8395 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8396 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8397 DAG.getConstant(-BitWidth, dl, AmtVT));
8398 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8399 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8400 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8401 SDValue OutOps[] = { OutLo, OutHi };
8402 return DAG.getMergeValues(OutOps, dl);
8403}
8404
8405SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8406 SDLoc dl(Op);
8407 EVT VT = Op.getValueType();
8408 unsigned BitWidth = VT.getSizeInBits();
8409 assert(Op.getNumOperands() == 3 &&
8410 VT == Op.getOperand(1).getValueType() &&
8411 "Unexpected SRA!");
8412
8413 // Expand into a bunch of logical ops, followed by a select_cc.
8414 SDValue Lo = Op.getOperand(0);
8415 SDValue Hi = Op.getOperand(1);
8416 SDValue Amt = Op.getOperand(2);
8417 EVT AmtVT = Amt.getValueType();
8418
8419 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8420 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8421 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8422 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8423 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8424 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8425 DAG.getConstant(-BitWidth, dl, AmtVT));
8426 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8427 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8428 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8429 Tmp4, Tmp6, ISD::SETLE);
8430 SDValue OutOps[] = { OutLo, OutHi };
8431 return DAG.getMergeValues(OutOps, dl);
8432}
8433
8434SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8435 SelectionDAG &DAG) const {
8436 SDLoc dl(Op);
8437 EVT VT = Op.getValueType();
8438 unsigned BitWidth = VT.getSizeInBits();
8439
8440 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8441 SDValue X = Op.getOperand(0);
8442 SDValue Y = Op.getOperand(1);
8443 SDValue Z = Op.getOperand(2);
8444 EVT AmtVT = Z.getValueType();
8445
8446 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8447 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8448 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8449 // on PowerPC shift by BW being well defined.
8450 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8451 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8452 SDValue SubZ =
8453 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8454 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8455 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8456 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8457}
8458
8459//===----------------------------------------------------------------------===//
8460// Vector related lowering.
8461//
8462
8463/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8464/// element size of SplatSize. Cast the result to VT.
8465static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8466 SelectionDAG &DAG, const SDLoc &dl) {
8467 static const MVT VTys[] = { // canonical VT to use for each size.
8469 };
8470
8471 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8472
8473 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8474 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8475 SplatSize = 1;
8476 Val = 0xFF;
8477 }
8478
8479 EVT CanonicalVT = VTys[SplatSize-1];
8480
8481 // Build a canonical splat for this value.
8482 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8483}
8484
8485/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8486/// specified intrinsic ID.
8487static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8488 const SDLoc &dl, EVT DestVT = MVT::Other) {
8489 if (DestVT == MVT::Other) DestVT = Op.getValueType();
8490 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8491 DAG.getConstant(IID, dl, MVT::i32), Op);
8492}
8493
8494/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8495/// specified intrinsic ID.
8496static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8497 SelectionDAG &DAG, const SDLoc &dl,
8498 EVT DestVT = MVT::Other) {
8499 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8500 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8501 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8502}
8503
8504/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8505/// specified intrinsic ID.
8506static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8507 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8508 EVT DestVT = MVT::Other) {
8509 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8510 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8511 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8512}
8513
8514/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8515/// amount. The result has the specified value type.
8516static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8517 SelectionDAG &DAG, const SDLoc &dl) {
8518 // Force LHS/RHS to be the right type.
8519 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8520 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8521
8522 int Ops[16];
8523 for (unsigned i = 0; i != 16; ++i)
8524 Ops[i] = i + Amt;
8525 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8526 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8527}
8528
8529/// Do we have an efficient pattern in a .td file for this node?
8530///
8531/// \param V - pointer to the BuildVectorSDNode being matched
8532/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8533///
8534/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8535/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8536/// the opposite is true (expansion is beneficial) are:
8537/// - The node builds a vector out of integers that are not 32 or 64-bits
8538/// - The node builds a vector out of constants
8539/// - The node is a "load-and-splat"
8540/// In all other cases, we will choose to keep the BUILD_VECTOR.
8542 bool HasDirectMove,
8543 bool HasP8Vector) {
8544 EVT VecVT = V->getValueType(0);
8545 bool RightType = VecVT == MVT::v2f64 ||
8546 (HasP8Vector && VecVT == MVT::v4f32) ||
8547 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8548 if (!RightType)
8549 return false;
8550
8551 bool IsSplat = true;
8552 bool IsLoad = false;
8553 SDValue Op0 = V->getOperand(0);
8554
8555 // This function is called in a block that confirms the node is not a constant
8556 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8557 // different constants.
8558 if (V->isConstant())
8559 return false;
8560 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8561 if (V->getOperand(i).isUndef())
8562 return false;
8563 // We want to expand nodes that represent load-and-splat even if the
8564 // loaded value is a floating point truncation or conversion to int.
8565 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8566 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8567 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8568 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8569 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8570 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8571 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8572 IsLoad = true;
8573 // If the operands are different or the input is not a load and has more
8574 // uses than just this BV node, then it isn't a splat.
8575 if (V->getOperand(i) != Op0 ||
8576 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8577 IsSplat = false;
8578 }
8579 return !(IsSplat && IsLoad);
8580}
8581
8582// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8583SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8584
8585 SDLoc dl(Op);
8586 SDValue Op0 = Op->getOperand(0);
8587
8588 if ((Op.getValueType() != MVT::f128) ||
8589 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8590 (Op0.getOperand(0).getValueType() != MVT::i64) ||
8591 (Op0.getOperand(1).getValueType() != MVT::i64))
8592 return SDValue();
8593
8594 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8595 Op0.getOperand(1));
8596}
8597
8598static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
8599 const SDValue *InputLoad = &Op;
8600 if (InputLoad->getOpcode() == ISD::BITCAST)
8601 InputLoad = &InputLoad->getOperand(0);
8602 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
8604 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
8605 InputLoad = &InputLoad->getOperand(0);
8606 }
8607 if (InputLoad->getOpcode() != ISD::LOAD)
8608 return nullptr;
8609 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8610 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
8611}
8612
8613// Convert the argument APFloat to a single precision APFloat if there is no
8614// loss in information during the conversion to single precision APFloat and the
8615// resulting number is not a denormal number. Return true if successful.
8617 APFloat APFloatToConvert = ArgAPFloat;
8618 bool LosesInfo = true;
8620 &LosesInfo);
8621 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
8622 if (Success)
8623 ArgAPFloat = APFloatToConvert;
8624 return Success;
8625}
8626
8627// Bitcast the argument APInt to a double and convert it to a single precision
8628// APFloat, bitcast the APFloat to an APInt and assign it to the original
8629// argument if there is no loss in information during the conversion from
8630// double to single precision APFloat and the resulting number is not a denormal
8631// number. Return true if successful.
8633 double DpValue = ArgAPInt.bitsToDouble();
8634 APFloat APFloatDp(DpValue);
8635 bool Success = convertToNonDenormSingle(APFloatDp);
8636 if (Success)
8637 ArgAPInt = APFloatDp.bitcastToAPInt();
8638 return Success;
8639}
8640
8641// If this is a case we can't handle, return null and let the default
8642// expansion code take care of it. If we CAN select this case, and if it
8643// selects to a single instruction, return Op. Otherwise, if we can codegen
8644// this case more efficiently than a constant pool load, lower it to the
8645// sequence of ops that should be used.
8646SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8647 SelectionDAG &DAG) const {
8648 SDLoc dl(Op);
8650 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8651
8652 // Check if this is a splat of a constant value.
8653 APInt APSplatBits, APSplatUndef;
8654 unsigned SplatBitSize;
8655 bool HasAnyUndefs;
8656 bool BVNIsConstantSplat =
8657 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8658 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
8659
8660 // If it is a splat of a double, check if we can shrink it to a 32 bit
8661 // non-denormal float which when converted back to double gives us the same
8662 // double. This is to exploit the XXSPLTIDP instruction.
8663 // If we lose precision, we use XXSPLTI32DX.
8664 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
8665 Subtarget.hasPrefixInstrs()) {
8666 // Check the type first to short-circuit so we don't modify APSplatBits if
8667 // this block isn't executed.
8668 if ((Op->getValueType(0) == MVT::v2f64) &&
8669 convertToNonDenormSingle(APSplatBits)) {
8670 SDValue SplatNode = DAG.getNode(
8672 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
8673 return DAG.getBitcast(Op.getValueType(), SplatNode);
8674 } else {
8675 // We may lose precision, so we have to use XXSPLTI32DX.
8676
8677 uint32_t Hi =
8678 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
8679 uint32_t Lo =
8680 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
8681 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
8682
8683 if (!Hi || !Lo)
8684 // If either load is 0, then we should generate XXLXOR to set to 0.
8685 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
8686
8687 if (Hi)
8688 SplatNode = DAG.getNode(
8689 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8690 DAG.getTargetConstant(0, dl, MVT::i32),
8691 DAG.getTargetConstant(Hi, dl, MVT::i32));
8692
8693 if (Lo)
8694 SplatNode =
8695 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8696 DAG.getTargetConstant(1, dl, MVT::i32),
8697 DAG.getTargetConstant(Lo, dl, MVT::i32));
8698
8699 return DAG.getBitcast(Op.getValueType(), SplatNode);
8700 }
8701 }
8702
8703 if (!BVNIsConstantSplat || SplatBitSize > 32) {
8704
8705 bool IsPermutedLoad = false;
8706 const SDValue *InputLoad =
8707 getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
8708 // Handle load-and-splat patterns as we have instructions that will do this
8709 // in one go.
8710 if (InputLoad && DAG.isSplatValue(Op, true)) {
8711 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8712
8713 // We have handling for 4 and 8 byte elements.
8714 unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
8715
8716 // Checking for a single use of this load, we have to check for vector
8717 // width (128 bits) / ElementSize uses (since each operand of the
8718 // BUILD_VECTOR is a separate use of the value.
8719 unsigned NumUsesOfInputLD = 128 / ElementSize;
8720 for (SDValue BVInOp : Op->ops())
8721 if (BVInOp.isUndef())
8722 NumUsesOfInputLD--;
8723 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
8724 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
8725 ((Subtarget.hasVSX() && ElementSize == 64) ||
8726 (Subtarget.hasP9Vector() && ElementSize == 32))) {
8727 SDValue Ops[] = {
8728 LD->getChain(), // Chain
8729 LD->getBasePtr(), // Ptr
8730 DAG.getValueType(Op.getValueType()) // VT
8731 };
8732 SDValue LdSplt = DAG.getMemIntrinsicNode(
8733 PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8734 Ops, LD->getMemoryVT(), LD->getMemOperand());
8735 // Replace all uses of the output chain of the original load with the
8736 // output chain of the new load.
8737 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
8738 LdSplt.getValue(1));
8739 return LdSplt;
8740 }
8741 }
8742
8743 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
8744 // 32-bits can be lowered to VSX instructions under certain conditions.
8745 // Without VSX, there is no pattern more efficient than expanding the node.
8746 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
8748 Subtarget.hasP8Vector()))
8749 return Op;
8750 return SDValue();
8751 }
8752
8753 uint64_t SplatBits = APSplatBits.getZExtValue();
8754 uint64_t SplatUndef = APSplatUndef.getZExtValue();
8755 unsigned SplatSize = SplatBitSize / 8;
8756
8757 // First, handle single instruction cases.
8758
8759 // All zeros?
8760 if (SplatBits == 0) {
8761 // Canonicalize all zero vectors to be v4i32.
8762 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8763 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8764 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8765 }
8766 return Op;
8767 }
8768
8769 // We have XXSPLTIW for constant splats four bytes wide.
8770 // Given vector length is a multiple of 4, 2-byte splats can be replaced
8771 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
8772 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
8773 // turned into a 4-byte splat of 0xABABABAB.
8774 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
8775 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
8776 Op.getValueType(), DAG, dl);
8777
8778 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
8779 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8780 dl);
8781
8782 // We have XXSPLTIB for constant splats one byte wide.
8783 if (Subtarget.hasP9Vector() && SplatSize == 1)
8784 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8785 dl);
8786
8787 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8788 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8789 (32-SplatBitSize));
8790 if (SextVal >= -16 && SextVal <= 15)
8791 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
8792 dl);
8793
8794 // Two instruction sequences.
8795
8796 // If this value is in the range [-32,30] and is even, use:
8797 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8798 // If this value is in the range [17,31] and is odd, use:
8799 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8800 // If this value is in the range [-31,-17] and is odd, use:
8801 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8802 // Note the last two are three-instruction sequences.
8803 if (SextVal >= -32 && SextVal <= 31) {
8804 // To avoid having these optimizations undone by constant folding,
8805 // we convert to a pseudo that will be expanded later into one of
8806 // the above forms.
8807 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8808 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8809 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8810 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8811 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8812 if (VT == Op.getValueType())
8813 return RetVal;
8814 else
8815 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8816 }
8817
8818 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8819 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8820 // for fneg/fabs.
8821 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8822 // Make -1 and vspltisw -1:
8823 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
8824
8825 // Make the VSLW intrinsic, computing 0x8000_0000.
8826 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8827 OnesV, DAG, dl);
8828
8829 // xor by OnesV to invert it.
8830 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8831 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8832 }
8833
8834 // Check to see if this is a wide variety of vsplti*, binop self cases.
8835 static const signed char SplatCsts[] = {
8836 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8837 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8838 };
8839
8840 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8841 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8842 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8843 int i = SplatCsts[idx];
8844
8845 // Figure out what shift amount will be used by altivec if shifted by i in
8846 // this splat size.
8847 unsigned TypeShiftAmt = i & (SplatBitSize-1);
8848
8849 // vsplti + shl self.
8850 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8851 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8852 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8853 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8854 Intrinsic::ppc_altivec_vslw
8855 };
8856 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8857 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8858 }
8859
8860 // vsplti + srl self.
8861 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8862 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8863 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8864 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8865 Intrinsic::ppc_altivec_vsrw
8866 };
8867 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8868 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8869 }
8870
8871 // vsplti + rol self.
8872 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8873 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8874 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8875 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8876 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8877 Intrinsic::ppc_altivec_vrlw
8878 };
8879 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8880 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8881 }
8882
8883 // t = vsplti c, result = vsldoi t, t, 1
8884 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8885 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8886 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8887 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8888 }
8889 // t = vsplti c, result = vsldoi t, t, 2
8890 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8891 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8892 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8893 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8894 }
8895 // t = vsplti c, result = vsldoi t, t, 3
8896 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8897 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8898 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8899 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8900 }
8901 }
8902
8903 return SDValue();
8904}
8905
8906/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8907/// the specified operations to build the shuffle.
8908static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8909 SDValue RHS, SelectionDAG &DAG,
8910 const SDLoc &dl) {
8911 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8912 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8913 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8914
8915 enum {
8916 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8917 OP_VMRGHW,
8918 OP_VMRGLW,
8919 OP_VSPLTISW0,
8920 OP_VSPLTISW1,
8921 OP_VSPLTISW2,
8922 OP_VSPLTISW3,
8923 OP_VSLDOI4,
8924 OP_VSLDOI8,
8925 OP_VSLDOI12
8926 };
8927
8928 if (OpNum == OP_COPY) {
8929 if (LHSID == (1*9+2)*9+3) return LHS;
8930 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8931 return RHS;
8932 }
8933
8934 SDValue OpLHS, OpRHS;
8935 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8936 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8937
8938 int ShufIdxs[16];
8939 switch (OpNum) {
8940 default: llvm_unreachable("Unknown i32 permute!");
8941 case OP_VMRGHW:
8942 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
8943 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8944 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
8945 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8946 break;
8947 case OP_VMRGLW:
8948 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8949 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8950 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8951 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8952 break;
8953 case OP_VSPLTISW0:
8954 for (unsigned i = 0; i != 16; ++i)
8955 ShufIdxs[i] = (i&3)+0;
8956 break;
8957 case OP_VSPLTISW1:
8958 for (unsigned i = 0; i != 16; ++i)
8959 ShufIdxs[i] = (i&3)+4;
8960 break;
8961 case OP_VSPLTISW2:
8962 for (unsigned i = 0; i != 16; ++i)
8963 ShufIdxs[i] = (i&3)+8;
8964 break;
8965 case OP_VSPLTISW3:
8966 for (unsigned i = 0; i != 16; ++i)
8967 ShufIdxs[i] = (i&3)+12;
8968 break;
8969 case OP_VSLDOI4:
8970 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8971 case OP_VSLDOI8:
8972 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8973 case OP_VSLDOI12:
8974 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8975 }
8976 EVT VT = OpLHS.getValueType();
8977 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8978 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8979 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8980 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8981}
8982
8983/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8984/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8985/// SDValue.
8986SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8987 SelectionDAG &DAG) const {
8988 const unsigned BytesInVector = 16;
8989 bool IsLE = Subtarget.isLittleEndian();
8990 SDLoc dl(N);
8991 SDValue V1 = N->getOperand(0);
8992 SDValue V2 = N->getOperand(1);
8993 unsigned ShiftElts = 0, InsertAtByte = 0;
8994 bool Swap = false;
8995
8996 // Shifts required to get the byte we want at element 7.
8997 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
8998 0, 15, 14, 13, 12, 11, 10, 9};
8999 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9000 1, 2, 3, 4, 5, 6, 7, 8};
9001
9002 ArrayRef<int> Mask = N->getMask();
9003 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9004
9005 // For each mask element, find out if we're just inserting something
9006 // from V2 into V1 or vice versa.
9007 // Possible permutations inserting an element from V2 into V1:
9008 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9009 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9010 // ...
9011 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9012 // Inserting from V1 into V2 will be similar, except mask range will be
9013 // [16,31].
9014
9015 bool FoundCandidate = false;
9016 // If both vector operands for the shuffle are the same vector, the mask
9017 // will contain only elements from the first one and the second one will be
9018 // undef.
9019 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9020 // Go through the mask of half-words to find an element that's being moved
9021 // from one vector to the other.
9022 for (unsigned i = 0; i < BytesInVector; ++i) {
9023 unsigned CurrentElement = Mask[i];
9024 // If 2nd operand is undefined, we should only look for element 7 in the
9025 // Mask.
9026 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9027 continue;
9028
9029 bool OtherElementsInOrder = true;
9030 // Examine the other elements in the Mask to see if they're in original
9031 // order.
9032 for (unsigned j = 0; j < BytesInVector; ++j) {
9033 if (j == i)
9034 continue;
9035 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9036 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9037 // in which we always assume we're always picking from the 1st operand.
9038 int MaskOffset =
9039 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9040 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9041 OtherElementsInOrder = false;
9042 break;
9043 }
9044 }
9045 // If other elements are in original order, we record the number of shifts
9046 // we need to get the element we want into element 7. Also record which byte
9047 // in the vector we should insert into.
9048 if (OtherElementsInOrder) {
9049 // If 2nd operand is undefined, we assume no shifts and no swapping.
9050 if (V2.isUndef()) {
9051 ShiftElts = 0;
9052 Swap = false;
9053 } else {
9054 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9055 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9056 : BigEndianShifts[CurrentElement & 0xF];
9057 Swap = CurrentElement < BytesInVector;
9058 }
9059 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9060 FoundCandidate = true;
9061 break;
9062 }
9063 }
9064
9065 if (!FoundCandidate)
9066 return SDValue();
9067
9068 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9069 // optionally with VECSHL if shift is required.
9070 if (Swap)
9071 std::swap(V1, V2);
9072 if (V2.isUndef())
9073 V2 = V1;
9074 if (ShiftElts) {
9075 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9076 DAG.getConstant(ShiftElts, dl, MVT::i32));
9077 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9078 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9079 }
9080 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9081 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9082}
9083
9084/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9085/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9086/// SDValue.
9087SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9088 SelectionDAG &DAG) const {
9089 const unsigned NumHalfWords = 8;
9090 const unsigned BytesInVector = NumHalfWords * 2;
9091 // Check that the shuffle is on half-words.
9092 if (!isNByteElemShuffleMask(N, 2, 1))
9093 return SDValue();
9094
9095 bool IsLE = Subtarget.isLittleEndian();
9096 SDLoc dl(N);
9097 SDValue V1 = N->getOperand(0);
9098 SDValue V2 = N->getOperand(1);
9099 unsigned ShiftElts = 0, InsertAtByte = 0;
9100 bool Swap = false;
9101
9102 // Shifts required to get the half-word we want at element 3.
9103 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9104 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9105
9106 uint32_t Mask = 0;
9107 uint32_t OriginalOrderLow = 0x1234567;
9108 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9109 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9110 // 32-bit space, only need 4-bit nibbles per element.
9111 for (unsigned i = 0; i < NumHalfWords; ++i) {
9112 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9113 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9114 }
9115
9116 // For each mask element, find out if we're just inserting something
9117 // from V2 into V1 or vice versa. Possible permutations inserting an element
9118 // from V2 into V1:
9119 // X, 1, 2, 3, 4, 5, 6, 7
9120 // 0, X, 2, 3, 4, 5, 6, 7
9121 // 0, 1, X, 3, 4, 5, 6, 7
9122 // 0, 1, 2, X, 4, 5, 6, 7
9123 // 0, 1, 2, 3, X, 5, 6, 7
9124 // 0, 1, 2, 3, 4, X, 6, 7
9125 // 0, 1, 2, 3, 4, 5, X, 7
9126 // 0, 1, 2, 3, 4, 5, 6, X
9127 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9128
9129 bool FoundCandidate = false;
9130 // Go through the mask of half-words to find an element that's being moved
9131 // from one vector to the other.
9132 for (unsigned i = 0; i < NumHalfWords; ++i) {
9133 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9134 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9135 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9136 uint32_t TargetOrder = 0x0;
9137
9138 // If both vector operands for the shuffle are the same vector, the mask
9139 // will contain only elements from the first one and the second one will be
9140 // undef.
9141 if (V2.isUndef()) {
9142 ShiftElts = 0;
9143 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9144 TargetOrder = OriginalOrderLow;
9145 Swap = false;
9146 // Skip if not the correct element or mask of other elements don't equal
9147 // to our expected order.
9148 if (MaskOneElt == VINSERTHSrcElem &&
9149 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9150 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9151 FoundCandidate = true;
9152 break;
9153 }
9154 } else { // If both operands are defined.
9155 // Target order is [8,15] if the current mask is between [0,7].
9156 TargetOrder =
9157 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9158 // Skip if mask of other elements don't equal our expected order.
9159 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9160 // We only need the last 3 bits for the number of shifts.
9161 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9162 : BigEndianShifts[MaskOneElt & 0x7];
9163 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9164 Swap = MaskOneElt < NumHalfWords;
9165 FoundCandidate = true;
9166 break;
9167 }
9168 }
9169 }
9170
9171 if (!FoundCandidate)
9172 return SDValue();
9173
9174 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9175 // optionally with VECSHL if shift is required.
9176 if (Swap)
9177 std::swap(V1, V2);
9178 if (V2.isUndef())
9179 V2 = V1;
9180 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9181 if (ShiftElts) {
9182 // Double ShiftElts because we're left shifting on v16i8 type.
9183 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9184 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9185 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9186 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9187 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9188 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9189 }
9190 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9191 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9192 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9193 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9194}
9195
9196/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9197/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9198/// return the default SDValue.
9199SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9200 SelectionDAG &DAG) const {
9201 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9202 // to v16i8. Peek through the bitcasts to get the actual operands.
9203 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9204 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9205
9206 auto ShuffleMask = SVN->getMask();
9207 SDValue VecShuffle(SVN, 0);
9208 SDLoc DL(SVN);
9209
9210 // Check that we have a four byte shuffle.
9211 if (!isNByteElemShuffleMask(SVN, 4, 1))
9212 return SDValue();
9213
9214 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9215 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9216 std::swap(LHS, RHS);
9218 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9219 }
9220
9221 // Ensure that the RHS is a vector of constants.
9222 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9223 if (!BVN)
9224 return SDValue();
9225
9226 // Check if RHS is a splat of 4-bytes (or smaller).
9227 APInt APSplatValue, APSplatUndef;
9228 unsigned SplatBitSize;
9229 bool HasAnyUndefs;
9230 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9231 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9232 SplatBitSize > 32)
9233 return SDValue();
9234
9235 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9236 // The instruction splats a constant C into two words of the source vector
9237 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9238 // Thus we check that the shuffle mask is the equivalent of
9239 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9240 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9241 // within each word are consecutive, so we only need to check the first byte.
9242 SDValue Index;
9243 bool IsLE = Subtarget.isLittleEndian();
9244 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9245 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9246 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9247 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9248 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9249 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9250 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9251 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9252 else
9253 return SDValue();
9254
9255 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9256 // for XXSPLTI32DX.
9257 unsigned SplatVal = APSplatValue.getZExtValue();
9258 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9259 SplatVal |= (SplatVal << SplatBitSize);
9260
9261 SDValue SplatNode = DAG.getNode(
9263 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9264 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9265}
9266
9267/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9268/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9269/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9270/// i.e (or (shl x, C1), (srl x, 128-C1)).
9271SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9272 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9273 assert(Op.getValueType() == MVT::v1i128 &&
9274 "Only set v1i128 as custom, other type shouldn't reach here!");
9275 SDLoc dl(Op);
9276 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9277 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9278 unsigned SHLAmt = N1.getConstantOperandVal(0);
9279 if (SHLAmt % 8 == 0) {
9281 std::iota(Mask.begin(), Mask.end(), 0);
9282 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9283 if (SDValue Shuffle =
9285 DAG.getUNDEF(MVT::v16i8), Mask))
9286 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9287 }
9288 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9289 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9290 DAG.getConstant(SHLAmt, dl, MVT::i32));
9291 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9292 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9293 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9294 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9295}
9296
9297/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9298/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9299/// return the code it can be lowered into. Worst case, it can always be
9300/// lowered into a vperm.
9301SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9302 SelectionDAG &DAG) const {
9303 SDLoc dl(Op);
9304 SDValue V1 = Op.getOperand(0);
9305 SDValue V2 = Op.getOperand(1);
9307
9308 // Any nodes that were combined in the target-independent combiner prior
9309 // to vector legalization will not be sent to the target combine. Try to
9310 // combine it here.
9311 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9312 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9313 return NewShuffle;
9314 Op = NewShuffle;
9315 SVOp = cast<ShuffleVectorSDNode>(Op);
9316 V1 = Op.getOperand(0);
9317 V2 = Op.getOperand(1);
9318 }
9319 EVT VT = Op.getValueType();
9320 bool isLittleEndian = Subtarget.isLittleEndian();
9321
9322 unsigned ShiftElts, InsertAtByte;
9323 bool Swap = false;
9324
9325 // If this is a load-and-splat, we can do that with a single instruction
9326 // in some cases. However if the load has multiple uses, we don't want to
9327 // combine it because that will just produce multiple loads.
9328 bool IsPermutedLoad = false;
9329 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9330 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9331 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9332 InputLoad->hasOneUse()) {
9333 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9334 int SplatIdx =
9335 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9336
9337 // The splat index for permuted loads will be in the left half of the vector
9338 // which is strictly wider than the loaded value by 8 bytes. So we need to
9339 // adjust the splat index to point to the correct address in memory.
9340 if (IsPermutedLoad) {
9341 assert(isLittleEndian && "Unexpected permuted load on big endian target");
9342 SplatIdx += IsFourByte ? 2 : 1;
9343 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9344 "Splat of a value outside of the loaded memory");
9345 }
9346
9347 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9348 // For 4-byte load-and-splat, we need Power9.
9349 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9350 uint64_t Offset = 0;
9351 if (IsFourByte)
9352 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9353 else
9354 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9355
9356 SDValue BasePtr = LD->getBasePtr();
9357 if (Offset != 0)
9359 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9360 SDValue Ops[] = {
9361 LD->getChain(), // Chain
9362 BasePtr, // BasePtr
9363 DAG.getValueType(Op.getValueType()) // VT
9364 };
9365 SDVTList VTL =
9366 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9367 SDValue LdSplt =
9369 Ops, LD->getMemoryVT(), LD->getMemOperand());
9370 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9371 if (LdSplt.getValueType() != SVOp->getValueType(0))
9372 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9373 return LdSplt;
9374 }
9375 }
9376 if (Subtarget.hasP9Vector() &&
9377 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9378 isLittleEndian)) {
9379 if (Swap)
9380 std::swap(V1, V2);
9381 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9382 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9383 if (ShiftElts) {
9384 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9385 DAG.getConstant(ShiftElts, dl, MVT::i32));
9386 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9387 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9388 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9389 }
9390 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9391 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9392 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9393 }
9394
9395 if (Subtarget.hasPrefixInstrs()) {
9396 SDValue SplatInsertNode;
9397 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9398 return SplatInsertNode;
9399 }
9400
9401 if (Subtarget.hasP9Altivec()) {
9402 SDValue NewISDNode;
9403 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9404 return NewISDNode;
9405
9406 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9407 return NewISDNode;
9408 }
9409
9410 if (Subtarget.hasVSX() &&
9411 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9412 if (Swap)
9413 std::swap(V1, V2);
9414 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9415 SDValue Conv2 =
9416 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9417
9418 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9419 DAG.getConstant(ShiftElts, dl, MVT::i32));
9420 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9421 }
9422
9423 if (Subtarget.hasVSX() &&
9424 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9425 if (Swap)
9426 std::swap(V1, V2);
9427 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9428 SDValue Conv2 =
9429 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9430
9431 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9432 DAG.getConstant(ShiftElts, dl, MVT::i32));
9433 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9434 }
9435
9436 if (Subtarget.hasP9Vector()) {
9437 if (PPC::isXXBRHShuffleMask(SVOp)) {
9438 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9439 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9440 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9441 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9442 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9443 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9444 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9445 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9446 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9447 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9448 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9449 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9450 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9451 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9452 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9453 }
9454 }
9455
9456 if (Subtarget.hasVSX()) {
9457 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9458 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9459
9460 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9461 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9462 DAG.getConstant(SplatIdx, dl, MVT::i32));
9463 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9464 }
9465
9466 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9467 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9468 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9469 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9470 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9471 }
9472 }
9473
9474 // Cases that are handled by instructions that take permute immediates
9475 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9476 // selected by the instruction selector.
9477 if (V2.isUndef()) {
9478 if (PPC::isSplatShuffleMask(SVOp, 1) ||
9479 PPC::isSplatShuffleMask(SVOp, 2) ||
9480 PPC::isSplatShuffleMask(SVOp, 4) ||
9481 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9482 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9483 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9484 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9485 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9486 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9487 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9488 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9489 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9490 (Subtarget.hasP8Altivec() && (
9491 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9492 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9493 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9494 return Op;
9495 }
9496 }
9497
9498 // Altivec has a variety of "shuffle immediates" that take two vector inputs
9499 // and produce a fixed permutation. If any of these match, do not lower to
9500 // VPERM.
9501 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9502 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9503 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9504 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9505 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9506 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9507 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9508 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9509 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9510 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9511 (Subtarget.hasP8Altivec() && (
9512 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9513 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9514 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9515 return Op;
9516
9517 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
9518 // perfect shuffle table to emit an optimal matching sequence.
9519 ArrayRef<int> PermMask = SVOp->getMask();
9520
9521 unsigned PFIndexes[4];
9522 bool isFourElementShuffle = true;
9523 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9524 unsigned EltNo = 8; // Start out undef.
9525 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
9526 if (PermMask[i*4+j] < 0)
9527 continue; // Undef, ignore it.
9528
9529 unsigned ByteSource = PermMask[i*4+j];
9530 if ((ByteSource & 3) != j) {
9531 isFourElementShuffle = false;
9532 break;
9533 }
9534
9535 if (EltNo == 8) {
9536 EltNo = ByteSource/4;
9537 } else if (EltNo != ByteSource/4) {
9538 isFourElementShuffle = false;
9539 break;
9540 }
9541 }
9542 PFIndexes[i] = EltNo;
9543 }
9544
9545 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9546 // perfect shuffle vector to determine if it is cost effective to do this as
9547 // discrete instructions, or whether we should use a vperm.
9548 // For now, we skip this for little endian until such time as we have a
9549 // little-endian perfect shuffle table.
9550 if (isFourElementShuffle && !isLittleEndian) {
9551 // Compute the index in the perfect shuffle table.
9552 unsigned PFTableIndex =
9553 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9554
9555 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9556 unsigned Cost = (PFEntry >> 30);
9557
9558 // Determining when to avoid vperm is tricky. Many things affect the cost
9559 // of vperm, particularly how many times the perm mask needs to be computed.
9560 // For example, if the perm mask can be hoisted out of a loop or is already
9561 // used (perhaps because there are multiple permutes with the same shuffle
9562 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9563 // the loop requires an extra register.
9564 //
9565 // As a compromise, we only emit discrete instructions if the shuffle can be
9566 // generated in 3 or fewer operations. When we have loop information
9567 // available, if this block is within a loop, we should avoid using vperm
9568 // for 3-operation perms and use a constant pool load instead.
9569 if (Cost < 3)
9570 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9571 }
9572
9573 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9574 // vector that will get spilled to the constant pool.
9575 if (V2.isUndef()) V2 = V1;
9576
9577 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9578 // that it is in input element units, not in bytes. Convert now.
9579
9580 // For little endian, the order of the input vectors is reversed, and
9581 // the permutation mask is complemented with respect to 31. This is
9582 // necessary to produce proper semantics with the big-endian-biased vperm
9583 // instruction.
9584 EVT EltVT = V1.getValueType().getVectorElementType();
9585 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9586
9587 SmallVector<SDValue, 16> ResultMask;
9588 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9589 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9590
9591 for (unsigned j = 0; j != BytesPerElement; ++j)
9592 if (isLittleEndian)
9593 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9594 dl, MVT::i32));
9595 else
9596 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9597 MVT::i32));
9598 }
9599
9600 ShufflesHandledWithVPERM++;
9601 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9602 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
9603 LLVM_DEBUG(SVOp->dump());
9604 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
9605 LLVM_DEBUG(VPermMask.dump());
9606
9607 if (isLittleEndian)
9608 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9609 V2, V1, VPermMask);
9610 else
9611 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9612 V1, V2, VPermMask);
9613}
9614
9615/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9616/// vector comparison. If it is, return true and fill in Opc/isDot with
9617/// information about the intrinsic.
9618static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9619 bool &isDot, const PPCSubtarget &Subtarget) {
9620 unsigned IntrinsicID =
9621 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9622 CompareOpc = -1;
9623 isDot = false;
9624 switch (IntrinsicID) {
9625 default:
9626 return false;
9627 // Comparison predicates.
9628 case Intrinsic::ppc_altivec_vcmpbfp_p:
9629 CompareOpc = 966;
9630 isDot = true;
9631 break;
9632 case Intrinsic::ppc_altivec_vcmpeqfp_p:
9633 CompareOpc = 198;
9634 isDot = true;
9635 break;
9636 case Intrinsic::ppc_altivec_vcmpequb_p:
9637 CompareOpc = 6;
9638 isDot = true;
9639 break;
9640 case Intrinsic::ppc_altivec_vcmpequh_p:
9641 CompareOpc = 70;
9642 isDot = true;
9643 break;
9644 case Intrinsic::ppc_altivec_vcmpequw_p:
9645 CompareOpc = 134;
9646 isDot = true;
9647 break;
9648 case Intrinsic::ppc_altivec_vcmpequd_p:
9649 if (Subtarget.hasP8Altivec()) {
9650 CompareOpc = 199;
9651 isDot = true;
9652 } else
9653 return false;
9654 break;
9655 case Intrinsic::ppc_altivec_vcmpneb_p:
9656 case Intrinsic::ppc_altivec_vcmpneh_p:
9657 case Intrinsic::ppc_altivec_vcmpnew_p:
9658 case Intrinsic::ppc_altivec_vcmpnezb_p:
9659 case Intrinsic::ppc_altivec_vcmpnezh_p:
9660 case Intrinsic::ppc_altivec_vcmpnezw_p:
9661 if (Subtarget.hasP9Altivec()) {
9662 switch (IntrinsicID) {
9663 default:
9664 llvm_unreachable("Unknown comparison intrinsic.");
9665 case Intrinsic::ppc_altivec_vcmpneb_p:
9666 CompareOpc = 7;
9667 break;
9668 case Intrinsic::ppc_altivec_vcmpneh_p:
9669 CompareOpc = 71;
9670 break;
9671 case Intrinsic::ppc_altivec_vcmpnew_p:
9672 CompareOpc = 135;
9673 break;
9674 case Intrinsic::ppc_altivec_vcmpnezb_p:
9675 CompareOpc = 263;
9676 break;
9677 case Intrinsic::ppc_altivec_vcmpnezh_p:
9678 CompareOpc = 327;
9679 break;
9680 case Intrinsic::ppc_altivec_vcmpnezw_p:
9681 CompareOpc = 391;
9682 break;
9683 }
9684 isDot = true;
9685 } else
9686 return false;
9687 break;
9688 case Intrinsic::ppc_altivec_vcmpgefp_p:
9689 CompareOpc = 454;
9690 isDot = true;
9691 break;
9692 case Intrinsic::ppc_altivec_vcmpgtfp_p:
9693 CompareOpc = 710;
9694 isDot = true;
9695 break;
9696 case Intrinsic::ppc_altivec_vcmpgtsb_p:
9697 CompareOpc = 774;
9698 isDot = true;
9699 break;
9700 case Intrinsic::ppc_altivec_vcmpgtsh_p:
9701 CompareOpc = 838;
9702 isDot = true;
9703 break;
9704 case Intrinsic::ppc_altivec_vcmpgtsw_p:
9705 CompareOpc = 902;
9706 isDot = true;
9707 break;
9708 case Intrinsic::ppc_altivec_vcmpgtsd_p:
9709 if (Subtarget.hasP8Altivec()) {
9710 CompareOpc = 967;
9711 isDot = true;
9712 } else
9713 return false;
9714 break;
9715 case Intrinsic::ppc_altivec_vcmpgtub_p:
9716 CompareOpc = 518;
9717 isDot = true;
9718 break;
9719 case Intrinsic::ppc_altivec_vcmpgtuh_p:
9720 CompareOpc = 582;
9721 isDot = true;
9722 break;
9723 case Intrinsic::ppc_altivec_vcmpgtuw_p:
9724 CompareOpc = 646;
9725 isDot = true;
9726 break;
9727 case Intrinsic::ppc_altivec_vcmpgtud_p:
9728 if (Subtarget.hasP8Altivec()) {
9729 CompareOpc = 711;
9730 isDot = true;
9731 } else
9732 return false;
9733 break;
9734
9735 case Intrinsic::ppc_altivec_vcmpequq:
9736 case Intrinsic::ppc_altivec_vcmpgtsq:
9737 case Intrinsic::ppc_altivec_vcmpgtuq:
9738 if (!Subtarget.isISA3_1())
9739 return false;
9740 switch (IntrinsicID) {
9741 default:
9742 llvm_unreachable("Unknown comparison intrinsic.");
9743 case Intrinsic::ppc_altivec_vcmpequq:
9744 CompareOpc = 455;
9745 break;
9746 case Intrinsic::ppc_altivec_vcmpgtsq:
9747 CompareOpc = 903;
9748 break;
9749 case Intrinsic::ppc_altivec_vcmpgtuq:
9750 CompareOpc = 647;
9751 break;
9752 }
9753 break;
9754
9755 // VSX predicate comparisons use the same infrastructure
9756 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9757 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9758 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9759 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9760 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9761 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9762 if (Subtarget.hasVSX()) {
9763 switch (IntrinsicID) {
9764 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9765 CompareOpc = 99;
9766 break;
9767 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9768 CompareOpc = 115;
9769 break;
9770 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9771 CompareOpc = 107;
9772 break;
9773 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9774 CompareOpc = 67;
9775 break;
9776 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9777 CompareOpc = 83;
9778 break;
9779 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9780 CompareOpc = 75;
9781 break;
9782 }
9783 isDot = true;
9784 } else
9785 return false;
9786 break;
9787
9788 // Normal Comparisons.
9789 case Intrinsic::ppc_altivec_vcmpbfp:
9790 CompareOpc = 966;
9791 break;
9792 case Intrinsic::ppc_altivec_vcmpeqfp:
9793 CompareOpc = 198;
9794 break;
9795 case Intrinsic::ppc_altivec_vcmpequb:
9796 CompareOpc = 6;
9797 break;
9798 case Intrinsic::ppc_altivec_vcmpequh:
9799 CompareOpc = 70;
9800 break;
9801 case Intrinsic::ppc_altivec_vcmpequw:
9802 CompareOpc = 134;
9803 break;
9804 case Intrinsic::ppc_altivec_vcmpequd:
9805 if (Subtarget.hasP8Altivec())
9806 CompareOpc = 199;
9807 else
9808 return false;
9809 break;
9810 case Intrinsic::ppc_altivec_vcmpneb:
9811 case Intrinsic::ppc_altivec_vcmpneh:
9812 case Intrinsic::ppc_altivec_vcmpnew:
9813 case Intrinsic::ppc_altivec_vcmpnezb:
9814 case Intrinsic::ppc_altivec_vcmpnezh:
9815 case Intrinsic::ppc_altivec_vcmpnezw:
9816 if (Subtarget.hasP9Altivec())
9817 switch (IntrinsicID) {
9818 default:
9819 llvm_unreachable("Unknown comparison intrinsic.");
9820 case Intrinsic::ppc_altivec_vcmpneb:
9821 CompareOpc = 7;
9822 break;
9823 case Intrinsic::ppc_altivec_vcmpneh:
9824 CompareOpc = 71;
9825 break;
9826 case Intrinsic::ppc_altivec_vcmpnew:
9827 CompareOpc = 135;
9828 break;
9829 case Intrinsic::ppc_altivec_vcmpnezb:
9830 CompareOpc = 263;
9831 break;
9832 case Intrinsic::ppc_altivec_vcmpnezh:
9833 CompareOpc = 327;
9834 break;
9835 case Intrinsic::ppc_altivec_vcmpnezw:
9836 CompareOpc = 391;
9837 break;
9838 }
9839 else
9840 return false;
9841 break;
9842 case Intrinsic::ppc_altivec_vcmpgefp:
9843 CompareOpc = 454;
9844 break;
9845 case Intrinsic::ppc_altivec_vcmpgtfp:
9846 CompareOpc = 710;
9847 break;
9848 case Intrinsic::ppc_altivec_vcmpgtsb:
9849 CompareOpc = 774;
9850 break;
9851 case Intrinsic::ppc_altivec_vcmpgtsh:
9852 CompareOpc = 838;
9853 break;
9854 case Intrinsic::ppc_altivec_vcmpgtsw:
9855 CompareOpc = 902;
9856 break;
9857 case Intrinsic::ppc_altivec_vcmpgtsd:
9858 if (Subtarget.hasP8Altivec())
9859 CompareOpc = 967;
9860 else
9861 return false;
9862 break;
9863 case Intrinsic::ppc_altivec_vcmpgtub:
9864 CompareOpc = 518;
9865 break;
9866 case Intrinsic::ppc_altivec_vcmpgtuh:
9867 CompareOpc = 582;
9868 break;
9869 case Intrinsic::ppc_altivec_vcmpgtuw:
9870 CompareOpc = 646;
9871 break;
9872 case Intrinsic::ppc_altivec_vcmpgtud:
9873 if (Subtarget.hasP8Altivec())
9874 CompareOpc = 711;
9875 else
9876 return false;
9877 break;
9878 case Intrinsic::ppc_altivec_vcmpequq_p:
9879 case Intrinsic::ppc_altivec_vcmpgtsq_p:
9880 case Intrinsic::ppc_altivec_vcmpgtuq_p:
9881 if (!Subtarget.isISA3_1())
9882 return false;
9883 switch (IntrinsicID) {
9884 default:
9885 llvm_unreachable("Unknown comparison intrinsic.");
9886 case Intrinsic::ppc_altivec_vcmpequq_p:
9887 CompareOpc = 455;
9888 break;
9889 case Intrinsic::ppc_altivec_vcmpgtsq_p:
9890 CompareOpc = 903;
9891 break;
9892 case Intrinsic::ppc_altivec_vcmpgtuq_p:
9893 CompareOpc = 647;
9894 break;
9895 }
9896 isDot = true;
9897 break;
9898 }
9899 return true;
9900}
9901
9902/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9903/// lower, do it, otherwise return null.
9904SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9905 SelectionDAG &DAG) const {
9906 unsigned IntrinsicID =
9907 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9908
9909 SDLoc dl(Op);
9910
9911 switch (IntrinsicID) {
9913 // Reads the thread pointer register, used for __builtin_thread_pointer.
9914 if (Subtarget.isPPC64())
9915 return DAG.getRegister(PPC::X13, MVT::i64);
9916 return DAG.getRegister(PPC::R2, MVT::i32);
9917
9918 case Intrinsic::ppc_mma_disassemble_acc:
9919 case Intrinsic::ppc_vsx_disassemble_pair: {
9920 int NumVecs = 2;
9921 SDValue WideVec = Op.getOperand(1);
9922 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
9923 NumVecs = 4;
9924 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
9925 }
9927 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
9928 SDValue Extract = DAG.getNode(
9929 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
9930 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
9931 : VecNo,
9932 dl, MVT::i64));
9933 RetOps.push_back(Extract);
9934 }
9935 return DAG.getMergeValues(RetOps, dl);
9936 }
9937 }
9938
9939 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9940 // opcode number of the comparison.
9941 int CompareOpc;
9942 bool isDot;
9943 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9944 return SDValue(); // Don't custom lower most intrinsics.
9945
9946 // If this is a non-dot comparison, make the VCMP node and we are done.
9947 if (!isDot) {
9948 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9949 Op.getOperand(1), Op.getOperand(2),
9950 DAG.getConstant(CompareOpc, dl, MVT::i32));
9951 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9952 }
9953
9954 // Create the PPCISD altivec 'dot' comparison node.
9955 SDValue Ops[] = {
9956 Op.getOperand(2), // LHS
9957 Op.getOperand(3), // RHS
9958 DAG.getConstant(CompareOpc, dl, MVT::i32)
9959 };
9960 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9961 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
9962
9963 // Now that we have the comparison, emit a copy from the CR to a GPR.
9964 // This is flagged to the above dot comparison.
9965 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9966 DAG.getRegister(PPC::CR6, MVT::i32),
9967 CompNode.getValue(1));
9968
9969 // Unpack the result based on how the target uses it.
9970 unsigned BitNo; // Bit # of CR6.
9971 bool InvertBit; // Invert result?
9972 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9973 default: // Can't happen, don't crash on invalid number though.
9974 case 0: // Return the value of the EQ bit of CR6.
9975 BitNo = 0; InvertBit = false;
9976 break;
9977 case 1: // Return the inverted value of the EQ bit of CR6.
9978 BitNo = 0; InvertBit = true;
9979 break;
9980 case 2: // Return the value of the LT bit of CR6.
9981 BitNo = 2; InvertBit = false;
9982 break;
9983 case 3: // Return the inverted value of the LT bit of CR6.
9984 BitNo = 2; InvertBit = true;
9985 break;
9986 }
9987
9988 // Shift the bit into the low position.
9989 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9990 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9991 // Isolate the bit.
9992 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9993 DAG.getConstant(1, dl, MVT::i32));
9994
9995 // If we are supposed to, toggle the bit.
9996 if (InvertBit)
9997 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9998 DAG.getConstant(1, dl, MVT::i32));
9999 return Flags;
10000}
10001
10002SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10003 SelectionDAG &DAG) const {
10004 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10005 // the beginning of the argument list.
10006 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10007 SDLoc DL(Op);
10008 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10009 case Intrinsic::ppc_cfence: {
10010 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10011 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10012 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10014 Op.getOperand(ArgStart + 1)),
10015 Op.getOperand(0)),
10016 0);
10017 }
10018 default:
10019 break;
10020 }
10021 return SDValue();
10022}
10023
10024// Lower scalar BSWAP64 to xxbrd.
10025SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10026 SDLoc dl(Op);
10027 // MTVSRDD
10028 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10029 Op.getOperand(0));
10030 // XXBRD
10031 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10032 // MFVSRD
10033 int VectorIndex = 0;
10034 if (Subtarget.isLittleEndian())
10035 VectorIndex = 1;
10037 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10038 return Op;
10039}
10040
10041// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10042// compared to a value that is atomically loaded (atomic loads zero-extend).
10043SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10044 SelectionDAG &DAG) const {
10045 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10046 "Expecting an atomic compare-and-swap here.");
10047 SDLoc dl(Op);
10048 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10049 EVT MemVT = AtomicNode->getMemoryVT();
10050 if (MemVT.getSizeInBits() >= 32)
10051 return Op;
10052
10053 SDValue CmpOp = Op.getOperand(2);
10054 // If this is already correctly zero-extended, leave it alone.
10055 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10056 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10057 return Op;
10058
10059 // Clear the high bits of the compare operand.
10060 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10061 SDValue NewCmpOp =
10062 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10063 DAG.getConstant(MaskVal, dl, MVT::i32));
10064
10065 // Replace the existing compare operand with the properly zero-extended one.
10067 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10068 Ops.push_back(AtomicNode->getOperand(i));
10069 Ops[2] = NewCmpOp;
10070 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10072 auto NodeTy =
10074 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10075}
10076
10077SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10078 SelectionDAG &DAG) const {
10079 SDLoc dl(Op);
10080 // Create a stack slot that is 16-byte aligned.
10082 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10083 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10084 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10085
10086 // Store the input value into Value#0 of the stack slot.
10087 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10089 // Load it out.
10090 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10091}
10092
10093SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10094 SelectionDAG &DAG) const {
10095 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10096 "Should only be called for ISD::INSERT_VECTOR_ELT");
10097
10098 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10099 // We have legal lowering for constant indices but not for variable ones.
10100 if (!C)
10101 return SDValue();
10102
10103 EVT VT = Op.getValueType();
10104 SDLoc dl(Op);
10105 SDValue V1 = Op.getOperand(0);
10106 SDValue V2 = Op.getOperand(1);
10107 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10108 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10109 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10110 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10111 unsigned InsertAtElement = C->getZExtValue();
10112 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10113 if (Subtarget.isLittleEndian()) {
10114 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10115 }
10116 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10117 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10118 }
10119 return Op;
10120}
10121
10122SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10123 SelectionDAG &DAG) const {
10124 SDLoc dl(Op);
10125 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10126 SDValue LoadChain = LN->getChain();
10127 SDValue BasePtr = LN->getBasePtr();
10128 EVT VT = Op.getValueType();
10129
10130 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10131 return Op;
10132
10133 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10134 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10135 // 2 or 4 vsx registers.
10136 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10137 "Type unsupported without MMA");
10138 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10139 "Type unsupported without paired vector support");
10140 Align Alignment = LN->getAlign();
10142 SmallVector<SDValue, 4> LoadChains;
10143 unsigned NumVecs = VT.getSizeInBits() / 128;
10144 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10145 SDValue Load =
10146 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10147 LN->getPointerInfo().getWithOffset(Idx * 16),
10148 commonAlignment(Alignment, Idx * 16),
10149 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10150 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10151 DAG.getConstant(16, dl, BasePtr.getValueType()));
10152 Loads.push_back(Load);
10153 LoadChains.push_back(Load.getValue(1));
10154 }
10155 if (Subtarget.isLittleEndian()) {
10156 std::reverse(Loads.begin(), Loads.end());
10157 std::reverse(LoadChains.begin(), LoadChains.end());
10158 }
10159 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10160 SDValue Value =
10162 dl, VT, Loads);
10163 SDValue RetOps[] = {Value, TF};
10164 return DAG.getMergeValues(RetOps, dl);
10165}
10166
10167SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10168 SelectionDAG &DAG) const {
10169 SDLoc dl(Op);
10170 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10171 SDValue StoreChain = SN->getChain();
10172 SDValue BasePtr = SN->getBasePtr();
10173 SDValue Value = SN->getValue();
10174 EVT StoreVT = Value.getValueType();
10175
10176 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10177 return Op;
10178
10179 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10180 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10181 // underlying registers individually.
10182 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10183 "Type unsupported without MMA");
10184 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10185 "Type unsupported without paired vector support");
10186 Align Alignment = SN->getAlign();
10188 unsigned NumVecs = 2;
10189 if (StoreVT == MVT::v512i1) {
10191 NumVecs = 4;
10192 }
10193 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10194 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10196 DAG.getConstant(VecNum, dl, MVT::i64));
10197 SDValue Store =
10198 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10199 SN->getPointerInfo().getWithOffset(Idx * 16),
10200 commonAlignment(Alignment, Idx * 16),
10201 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10202 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10203 DAG.getConstant(16, dl, BasePtr.getValueType()));
10204 Stores.push_back(Store);
10205 }
10206 SDValue TF = DAG.getTokenFactor(dl, Stores);
10207 return TF;
10208}
10209
10210SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10211 SDLoc dl(Op);
10212 if (Op.getValueType() == MVT::v4i32) {
10213 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10214
10215 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10216 // +16 as shift amt.
10217 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10218 SDValue RHSSwap = // = vrlw RHS, 16
10219 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10220
10221 // Shrinkify inputs to v8i16.
10222 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10223 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10224 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10225
10226 // Low parts multiplied together, generating 32-bit results (we ignore the
10227 // top parts).
10228 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10229 LHS, RHS, DAG, dl, MVT::v4i32);
10230
10231 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10232 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10233 // Shift the high parts up 16 bits.
10234 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10235 Neg16, DAG, dl);
10236 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10237 } else if (Op.getValueType() == MVT::v16i8) {
10238 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10239 bool isLittleEndian = Subtarget.isLittleEndian();
10240
10241 // Multiply the even 8-bit parts, producing 16-bit sums.
10242 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10243 LHS, RHS, DAG, dl, MVT::v8i16);
10244 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10245
10246 // Multiply the odd 8-bit parts, producing 16-bit sums.
10247 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10248 LHS, RHS, DAG, dl, MVT::v8i16);
10249 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10250
10251 // Merge the results together. Because vmuleub and vmuloub are
10252 // instructions with a big-endian bias, we must reverse the
10253 // element numbering and reverse the meaning of "odd" and "even"
10254 // when generating little endian code.
10255 int Ops[16];
10256 for (unsigned i = 0; i != 8; ++i) {
10257 if (isLittleEndian) {
10258 Ops[i*2 ] = 2*i;
10259 Ops[i*2+1] = 2*i+16;
10260 } else {
10261 Ops[i*2 ] = 2*i+1;
10262 Ops[i*2+1] = 2*i+1+16;
10263 }
10264 }
10265 if (isLittleEndian)
10266 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10267 else
10268 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10269 } else {
10270 llvm_unreachable("Unknown mul to lower!");
10271 }
10272}
10273
10274SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10275 bool IsStrict = Op->isStrictFPOpcode();
10276 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10277 !Subtarget.hasP9Vector())
10278 return SDValue();
10279
10280 return Op;
10281}
10282
10283// Custom lowering for fpext vf32 to v2f64
10284SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10285
10286 assert(Op.getOpcode() == ISD::FP_EXTEND &&
10287 "Should only be called for ISD::FP_EXTEND");
10288
10289 // FIXME: handle extends from half precision float vectors on P9.
10290 // We only want to custom lower an extend from v2f32 to v2f64.
10291 if (Op.getValueType() != MVT::v2f64 ||
10292 Op.getOperand(0).getValueType() != MVT::v2f32)
10293 return SDValue();
10294
10295 SDLoc dl(Op);
10296 SDValue Op0 = Op.getOperand(0);
10297
10298 switch (Op0.getOpcode()) {
10299 default:
10300 return SDValue();
10302 assert(Op0.getNumOperands() == 2 &&
10304 "Node should have 2 operands with second one being a constant!");
10305
10306 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10307 return SDValue();
10308
10309 // Custom lower is only done for high or low doubleword.
10310 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10311 if (Idx % 2 != 0)
10312 return SDValue();
10313
10314 // Since input is v4f32, at this point Idx is either 0 or 2.
10315 // Shift to get the doubleword position we want.
10316 int DWord = Idx >> 1;
10317
10318 // High and low word positions are different on little endian.
10319 if (Subtarget.isLittleEndian())
10320 DWord ^= 0x1;
10321
10323 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10324 }
10325 case ISD::FADD:
10326 case ISD::FMUL:
10327 case ISD::FSUB: {
10328 SDValue NewLoad[2];
10329 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10330 // Ensure both input are loads.
10331 SDValue LdOp = Op0.getOperand(i);
10332 if (LdOp.getOpcode() != ISD::LOAD)
10333 return SDValue();
10334 // Generate new load node.
10336 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10337 NewLoad[i] = DAG.getMemIntrinsicNode(
10339 LD->getMemoryVT(), LD->getMemOperand());
10340 }
10341 SDValue NewOp =
10342 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10343 NewLoad[1], Op0.getNode()->getFlags());
10344 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10345 DAG.getConstant(0, dl, MVT::i32));
10346 }
10347 case ISD::LOAD: {
10349 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10350 SDValue NewLd = DAG.getMemIntrinsicNode(
10352 LD->getMemoryVT(), LD->getMemOperand());
10353 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10354 DAG.getConstant(0, dl, MVT::i32));
10355 }
10356 }
10357 llvm_unreachable("ERROR:Should return for all cases within swtich.");
10358}
10359
10360/// LowerOperation - Provide custom lowering hooks for some operations.
10361///
10363 switch (Op.getOpcode()) {
10364 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10365 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10366 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10367 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10368 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10369 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10370 case ISD::SETCC: return LowerSETCC(Op, DAG);
10371 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10372 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10373
10374 case ISD::INLINEASM:
10375 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
10376 // Variable argument lowering.
10377 case ISD::VASTART: return LowerVASTART(Op, DAG);
10378 case ISD::VAARG: return LowerVAARG(Op, DAG);
10379 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10380
10381 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10382 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10384 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10385
10386 // Exception handling lowering.
10387 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10388 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10389 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10390
10391 case ISD::LOAD: return LowerLOAD(Op, DAG);
10392 case ISD::STORE: return LowerSTORE(Op, DAG);
10393 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10394 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10397 case ISD::FP_TO_UINT:
10398 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10401 case ISD::UINT_TO_FP:
10402 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10403 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10404
10405 // Lower 64-bit shifts.
10406 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10407 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10408 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10409
10410 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10411 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10412
10413 // Vector-related lowering.
10414 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10415 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10416 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10417 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10418 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10419 case ISD::MUL: return LowerMUL(Op, DAG);
10420 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10422 case ISD::FP_ROUND:
10423 return LowerFP_ROUND(Op, DAG);
10424 case ISD::ROTL: return LowerROTL(Op, DAG);
10425
10426 // For counter-based loop handling.
10427 case ISD::INTRINSIC_W_CHAIN: return SDValue();
10428
10429 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10430
10431 // Frame & Return address.
10432 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10433 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10434
10436 return LowerINTRINSIC_VOID(Op, DAG);
10437 case ISD::BSWAP:
10438 return LowerBSWAP(Op, DAG);
10440 return LowerATOMIC_CMP_SWAP(Op, DAG);
10441 }
10442}
10443
10446 SelectionDAG &DAG) const {
10447 SDLoc dl(N);
10448 switch (N->getOpcode()) {
10449 default:
10450 llvm_unreachable("Do not know how to custom type legalize this operation!");
10451 case ISD::READCYCLECOUNTER: {
10453 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10454
10455 Results.push_back(
10456 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10457 Results.push_back(RTB.getValue(2));
10458 break;
10459 }
10461 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10463 break;
10464
10465 assert(N->getValueType(0) == MVT::i1 &&
10466 "Unexpected result type for CTR decrement intrinsic");
10467 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10468 N->getValueType(0));
10469 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10470 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10471 N->getOperand(1));
10472
10473 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10474 Results.push_back(NewInt.getValue(1));
10475 break;
10476 }
10477 case ISD::VAARG: {
10478 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10479 return;
10480
10481 EVT VT = N->getValueType(0);
10482
10483 if (VT == MVT::i64) {
10484 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10485
10486 Results.push_back(NewNode);
10487 Results.push_back(NewNode.getValue(1));
10488 }
10489 return;
10490 }
10493 case ISD::FP_TO_SINT:
10494 case ISD::FP_TO_UINT:
10495 // LowerFP_TO_INT() can only handle f32 and f64.
10496 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10498 return;
10499 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10500 return;
10501 case ISD::TRUNCATE: {
10502 if (!N->getValueType(0).isVector())
10503 return;
10504 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10505 if (Lowered)
10506 Results.push_back(Lowered);
10507 return;
10508 }
10509 case ISD::FSHL:
10510 case ISD::FSHR:
10511 // Don't handle funnel shifts here.
10512 return;
10513 case ISD::BITCAST:
10514 // Don't handle bitcast here.
10515 return;
10516 case ISD::FP_EXTEND:
10517 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10518 if (Lowered)
10519 Results.push_back(Lowered);
10520 return;
10521 }
10522}
10523
10524//===----------------------------------------------------------------------===//
10525// Other Lowering Code
10526//===----------------------------------------------------------------------===//
10527
10529 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10530 Function *Func = Intrinsic::getDeclaration(M, Id);
10531 return Builder.CreateCall(Func, {});
10532}
10533
10534// The mappings for emitLeading/TrailingFence is taken from
10535// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10537 Instruction *Inst,
10538 AtomicOrdering Ord) const {
10540 return callIntrinsic(Builder, Intrinsic::ppc_sync);
10541 if (isReleaseOrStronger(Ord))
10542 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10543 return nullptr;
10544}
10545
10547 Instruction *Inst,
10548 AtomicOrdering Ord) const {
10549 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10550 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10551 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10552 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10553 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10554 return Builder.CreateCall(
10556 Builder.GetInsertBlock()->getParent()->getParent(),
10557 Intrinsic::ppc_cfence, {Inst->getType()}),
10558 {Inst});
10559 // FIXME: Can use isync for rmw operation.
10560 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10561 }
10562 return nullptr;
10563}
10564
10567 unsigned AtomicSize,
10568 unsigned BinOpcode,
10569 unsigned CmpOpcode,
10570 unsigned CmpPred) const {
10571 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10572 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10573
10574 auto LoadMnemonic = PPC::LDARX;
10575 auto StoreMnemonic = PPC::STDCX;
10576 switch (AtomicSize) {
10577 default:
10578 llvm_unreachable("Unexpected size of atomic entity");
10579 case 1:
10580 LoadMnemonic = PPC::LBARX;
10581 StoreMnemonic = PPC::STBCX;
10582 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10583 break;
10584 case 2:
10585 LoadMnemonic = PPC::LHARX;
10586 StoreMnemonic = PPC::STHCX;
10587 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10588 break;
10589 case 4:
10590 LoadMnemonic = PPC::LWARX;
10591 StoreMnemonic = PPC::STWCX;
10592 break;
10593 case 8:
10594 LoadMnemonic = PPC::LDARX;
10595 StoreMnemonic = PPC::STDCX;
10596 break;
10597 }
10598
10599 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10600 MachineFunction *F = BB->getParent();
10602
10603 Register dest = MI.getOperand(0).getReg();
10604 Register ptrA = MI.getOperand(1).getReg();
10605 Register ptrB = MI.getOperand(2).getReg();
10606 Register incr = MI.getOperand(3).getReg();
10607 DebugLoc dl = MI.getDebugLoc();
10608
10609 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10610 MachineBasicBlock *loop2MBB =
10611 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10612 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10613 F->insert(It, loopMBB);
10614 if (CmpOpcode)
10615 F->insert(It, loop2MBB);
10616 F->insert(It, exitMBB);
10617 exitMBB->splice(exitMBB->begin(), BB,
10618 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10620
10621 MachineRegisterInfo &RegInfo = F->getRegInfo();
10622 Register TmpReg = (!BinOpcode) ? incr :
10623 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10624 : &PPC::GPRCRegClass);
10625
10626 // thisMBB:
10627 // ...
10628 // fallthrough --> loopMBB
10629 BB->addSuccessor(loopMBB);
10630
10631 // loopMBB:
10632 // l[wd]arx dest, ptr
10633 // add r0, dest, incr
10634 // st[wd]cx. r0, ptr
10635 // bne- loopMBB
10636 // fallthrough --> exitMBB
10637
10638 // For max/min...
10639 // loopMBB:
10640 // l[wd]arx dest, ptr
10641 // cmpl?[wd] incr, dest
10642 // bgt exitMBB
10643 // loop2MBB:
10644 // st[wd]cx. dest, ptr
10645 // bne- loopMBB
10646 // fallthrough --> exitMBB
10647
10648 BB = loopMBB;
10649 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10650 .addReg(ptrA).addReg(ptrB);
10651 if (BinOpcode)
10652 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10653 if (CmpOpcode) {
10654 // Signed comparisons of byte or halfword values must be sign-extended.
10655 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10656 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10657 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10658 ExtReg).addReg(dest);
10659 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10660 .addReg(incr).addReg(ExtReg);
10661 } else
10662 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10663 .addReg(incr).addReg(dest);
10664
10665 BuildMI(BB, dl, TII->get(PPC::BCC))
10666 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10667 BB->addSuccessor(loop2MBB);
10668 BB->addSuccessor(exitMBB);
10669 BB = loop2MBB;
10670 }
10671 BuildMI(BB, dl, TII->get(StoreMnemonic))
10672 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10673 BuildMI(BB, dl, TII->get(PPC::BCC))
10674 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10675 BB->addSuccessor(loopMBB);
10676 BB->addSuccessor(exitMBB);
10677
10678 // exitMBB:
10679 // ...
10680 BB = exitMBB;
10681 return BB;
10682}
10683
10685 switch(MI.getOpcode()) {
10686 default:
10687 return false;
10688 case PPC::COPY:
10689 return TII->isSignExtended(MI);
10690 case PPC::LHA:
10691 case PPC::LHA8:
10692 case PPC::LHAU:
10693 case PPC::LHAU8:
10694 case PPC::LHAUX:
10695 case PPC::LHAUX8:
10696 case PPC::LHAX:
10697 case PPC::LHAX8:
10698 case PPC::LWA:
10699 case PPC::LWAUX:
10700 case PPC::LWAX:
10701 case PPC::LWAX_32:
10702 case PPC::LWA_32:
10703 case PPC::PLHA:
10704 case PPC::PLHA8:
10705 case PPC::PLHA8pc:
10706 case PPC::PLHApc:
10707 case PPC::PLWA:
10708 case PPC::PLWA8:
10709 case PPC::PLWA8pc:
10710 case PPC::PLWApc:
10711 case PPC::EXTSB:
10712 case PPC::EXTSB8:
10713 case PPC::EXTSB8_32_64:
10714 case PPC::EXTSB8_rec:
10715 case PPC::EXTSB_rec:
10716 case PPC::EXTSH:
10717 case PPC::EXTSH8:
10718 case PPC::EXTSH8_32_64:
10719 case PPC::EXTSH8_rec:
10720 case PPC::EXTSH_rec:
10721 case PPC::EXTSW:
10722 case PPC::EXTSWSLI:
10723 case PPC::EXTSWSLI_32_64:
10724 case PPC::EXTSWSLI_32_64_rec:
10725 case PPC::EXTSWSLI_rec:
10726 case PPC::EXTSW_32:
10727 case PPC::EXTSW_32_64:
10728 case PPC::EXTSW_32_64_rec:
10729 case PPC::EXTSW_rec:
10730 case PPC::SRAW:
10731 case PPC::SRAWI:
10732 case PPC::SRAWI_rec:
10733 case PPC::SRAW_rec:
10734 return true;
10735 }
10736 return false;
10737}
10738
10741 bool is8bit, // operation
10742 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10743 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10744 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10745
10746 // If this is a signed comparison and the value being compared is not known
10747 // to be sign extended, sign extend it here.
10748 DebugLoc dl = MI.getDebugLoc();
10749 MachineFunction *F = BB->getParent();
10750 MachineRegisterInfo &RegInfo = F->getRegInfo();
10751 Register incr = MI.getOperand(3).getReg();
10752 bool IsSignExtended = Register::isVirtualRegister(incr) &&
10753 isSignExtended(*RegInfo.getVRegDef(incr), TII);
10754
10755 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
10756 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10757 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
10758 .addReg(MI.getOperand(3).getReg());
10759 MI.getOperand(3).setReg(ValueReg);
10760 }
10761 // If we support part-word atomic mnemonics, just use them
10762 if (Subtarget.hasPartwordAtomics())
10763 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10764 CmpPred);
10765
10766 // In 64 bit mode we have to use 64 bits for addresses, even though the
10767 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10768 // registers without caring whether they're 32 or 64, but here we're
10769 // doing actual arithmetic on the addresses.
10770 bool is64bit = Subtarget.isPPC64();
10771 bool isLittleEndian = Subtarget.isLittleEndian();
10772 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10773
10774 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10776
10777 Register dest = MI.getOperand(0).getReg();
10778 Register ptrA = MI.getOperand(1).getReg();
10779 Register ptrB = MI.getOperand(2).getReg();
10780
10781 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10782 MachineBasicBlock *loop2MBB =
10783 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10784 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10785 F->insert(It, loopMBB);
10786 if (CmpOpcode)
10787 F->insert(It, loop2MBB);
10788 F->insert(It, exitMBB);
10789 exitMBB->splice(exitMBB->begin(), BB,
10790 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10792
10793 const TargetRegisterClass *RC =
10794 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10795 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10796
10797 Register PtrReg = RegInfo.createVirtualRegister(RC);
10798 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10799 Register ShiftReg =
10800 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10801 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10802 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10803 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10804 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10805 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10806 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10807 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10808 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10809 Register Ptr1Reg;
10810 Register TmpReg =
10811 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10812
10813 // thisMBB:
10814 // ...
10815 // fallthrough --> loopMBB
10816 BB->addSuccessor(loopMBB);
10817
10818 // The 4-byte load must be aligned, while a char or short may be
10819 // anywhere in the word. Hence all this nasty bookkeeping code.
10820 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10821 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10822 // xori shift, shift1, 24 [16]
10823 // rlwinm ptr, ptr1, 0, 0, 29
10824 // slw incr2, incr, shift
10825 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10826 // slw mask, mask2, shift
10827 // loopMBB:
10828 // lwarx tmpDest, ptr
10829 // add tmp, tmpDest, incr2
10830 // andc tmp2, tmpDest, mask
10831 // and tmp3, tmp, mask
10832 // or tmp4, tmp3, tmp2
10833 // stwcx. tmp4, ptr
10834 // bne- loopMBB
10835 // fallthrough --> exitMBB
10836 // srw dest, tmpDest, shift
10837 if (ptrA != ZeroReg) {
10838 Ptr1Reg = RegInfo.createVirtualRegister(RC);
10839 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10840 .addReg(ptrA)
10841 .addReg(ptrB);
10842 } else {
10843 Ptr1Reg = ptrB;
10844 }
10845 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10846 // mode.
10847 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10848 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10849 .addImm(3)
10850 .addImm(27)
10851 .addImm(is8bit ? 28 : 27);
10852 if (!isLittleEndian)
10853 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10854 .addReg(Shift1Reg)
10855 .addImm(is8bit ? 24 : 16);
10856 if (is64bit)
10857 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10858 .addReg(Ptr1Reg)
10859 .addImm(0)
10860 .addImm(61);
10861 else
10862 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10863 .addReg(Ptr1Reg)
10864 .addImm(0)
10865 .addImm(0)
10866 .addImm(29);
10867 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10868 if (is8bit)
10869 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10870 else {
10871 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10872 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10873 .addReg(Mask3Reg)
10874 .addImm(65535);
10875 }
10876 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10877 .addReg(Mask2Reg)
10878 .addReg(ShiftReg);
10879
10880 BB = loopMBB;
10881 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10882 .addReg(ZeroReg)
10883 .addReg(PtrReg);
10884 if (BinOpcode)
10885 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10886 .addReg(Incr2Reg)
10887 .addReg(TmpDestReg);
10888 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10889 .addReg(TmpDestReg)
10890 .addReg(MaskReg);
10891 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10892 if (CmpOpcode) {
10893 // For unsigned comparisons, we can directly compare the shifted values.
10894 // For signed comparisons we shift and sign extend.
10895 Register SReg = RegInfo.createVirtualRegister(GPRC);
10896 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10897 .addReg(TmpDestReg)
10898 .addReg(MaskReg);
10899 unsigned ValueReg = SReg;
10900 unsigned CmpReg = Incr2Reg;
10901 if (CmpOpcode == PPC::CMPW) {
10902 ValueReg = RegInfo.createVirtualRegister(GPRC);
10903 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10904 .addReg(SReg)
10905 .addReg(ShiftReg);
10906 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10907 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10908 .addReg(ValueReg);
10909 ValueReg = ValueSReg;
10910 CmpReg = incr;
10911 }
10912 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10913 .addReg(CmpReg)
10914 .addReg(ValueReg);
10915 BuildMI(BB, dl, TII->get(PPC::BCC))
10916 .addImm(CmpPred)
10917 .addReg(PPC::CR0)
10918 .addMBB(exitMBB);
10919 BB->addSuccessor(loop2MBB);
10920 BB->addSuccessor(exitMBB);
10921 BB = loop2MBB;
10922 }
10923 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10924 BuildMI(BB, dl, TII->get(PPC::STWCX))
10925 .addReg(Tmp4Reg)
10926 .addReg(ZeroReg)
10927 .addReg(PtrReg);
10928 BuildMI(BB, dl, TII->get(PPC::BCC))
10930 .addReg(PPC::CR0)
10931 .addMBB(loopMBB);
10932 BB->addSuccessor(loopMBB);
10933 BB->addSuccessor(exitMBB);
10934
10935 // exitMBB:
10936 // ...
10937 BB = exitMBB;
10938 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10939 .addReg(TmpDestReg)
10940 .addReg(ShiftReg);
10941 return BB;
10942}
10943
10946 MachineBasicBlock *MBB) const {
10947 DebugLoc DL = MI.getDebugLoc();
10948 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10949 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10950
10951 MachineFunction *MF = MBB->getParent();
10953
10954 const BasicBlock *BB = MBB->getBasicBlock();
10956
10957 Register DstReg = MI.getOperand(0).getReg();
10958 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10959 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10960 Register mainDstReg = MRI.createVirtualRegister(RC);
10961 Register restoreDstReg = MRI.createVirtualRegister(RC);
10962
10963 MVT PVT = getPointerTy(MF->getDataLayout());
10964 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10965 "Invalid Pointer Size!");
10966 // For v = setjmp(buf), we generate
10967 //
10968 // thisMBB:
10969 // SjLjSetup mainMBB
10970 // bl mainMBB
10971 // v_restore = 1
10972 // b sinkMBB
10973 //
10974 // mainMBB:
10975 // buf[LabelOffset] = LR
10976 // v_main = 0
10977 //
10978 // sinkMBB:
10979 // v = phi(main, restore)
10980 //
10981
10982 MachineBasicBlock *thisMBB = MBB;
10983 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10984 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10985 MF->insert(I, mainMBB);
10986 MF->insert(I, sinkMBB);
10987
10989
10990 // Transfer the remainder of BB and its successor edges to sinkMBB.
10991 sinkMBB->splice(sinkMBB->begin(), MBB,
10992 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10994
10995 // Note that the structure of the jmp_buf used here is not compatible
10996 // with that used by libc, and is not designed to be. Specifically, it
10997 // stores only those 'reserved' registers that LLVM does not otherwise
10998 // understand how to spill. Also, by convention, by the time this
10999 // intrinsic is called, Clang has already stored the frame address in the
11000 // first slot of the buffer and stack address in the third. Following the
11001 // X86 target code, we'll store the jump address in the second slot. We also
11002 // need to save the TOC pointer (R2) to handle jumps between shared
11003 // libraries, and that will be stored in the fourth slot. The thread
11004 // identifier (R13) is not affected.
11005
11006 // thisMBB:
11007 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11008 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11009 const int64_t BPOffset = 4 * PVT.getStoreSize();
11010
11011 // Prepare IP either in reg.
11012 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11013 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11014 Register BufReg = MI.getOperand(1).getReg();
11015
11016 if (Subtarget.is64BitELFABI()) {
11018 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11019 .addReg(PPC::X2)
11020 .addImm(TOCOffset)
11021 .addReg(BufReg)
11022 .cloneMemRefs(MI);
11023 }
11024
11025 // Naked functions never have a base pointer, and so we use r1. For all
11026 // other functions, this decision must be delayed until during PEI.
11027 unsigned BaseReg;
11028 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11029 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11030 else
11031 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11032
11033 MIB = BuildMI(*thisMBB, MI, DL,
11034 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11035 .addReg(BaseReg)
11036 .addImm(BPOffset)
11037 .addReg(BufReg)
11038 .cloneMemRefs(MI);
11039
11040 // Setup
11041 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11042 MIB.addRegMask(TRI->getNoPreservedMask());
11043
11044 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11045
11046 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11047 .addMBB(mainMBB);
11048 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11049
11050 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11051 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11052
11053 // mainMBB:
11054 // mainDstReg = 0
11055 MIB =
11056 BuildMI(mainMBB, DL,
11057 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11058
11059 // Store IP
11060 if (Subtarget.isPPC64()) {
11061 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11062 .addReg(LabelReg)
11063 .addImm(LabelOffset)
11064 .addReg(BufReg);
11065 } else {
11066 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11067 .addReg(LabelReg)
11068 .addImm(LabelOffset)
11069 .addReg(BufReg);
11070 }
11071 MIB.cloneMemRefs(MI);
11072
11073 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11074 mainMBB->addSuccessor(sinkMBB);
11075
11076 // sinkMBB:
11077 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11078 TII->get(PPC::PHI), DstReg)
11079 .addReg(mainDstReg).addMBB(mainMBB)
11080 .addReg(restoreDstReg).addMBB(thisMBB);
11081
11082 MI.eraseFromParent();
11083 return sinkMBB;
11084}
11085
11088 MachineBasicBlock *MBB) const {
11089 DebugLoc DL = MI.getDebugLoc();
11090 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11091
11092 MachineFunction *MF = MBB->getParent();
11094
11095 MVT PVT = getPointerTy(MF->getDataLayout());
11096 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11097 "Invalid Pointer Size!");
11098
11099 const TargetRegisterClass *RC =
11100 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11101 Register Tmp = MRI.createVirtualRegister(RC);
11102 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11103 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11104 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11105 unsigned BP =
11106 (PVT == MVT::i64)
11107 ? PPC::X30
11108 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11109 : PPC::R30);
11110
11112
11113 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11114 const int64_t SPOffset = 2 * PVT.getStoreSize();
11115 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11116 const int64_t BPOffset = 4 * PVT.getStoreSize();
11117
11118 Register BufReg = MI.getOperand(0).getReg();
11119
11120 // Reload FP (the jumped-to function may not have had a
11121 // frame pointer, and if so, then its r31 will be restored
11122 // as necessary).
11123 if (PVT == MVT::i64) {
11124 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11125 .addImm(0)
11126 .addReg(BufReg);
11127 } else {
11128 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11129 .addImm(0)
11130 .addReg(BufReg);
11131 }
11132 MIB.cloneMemRefs(MI);
11133
11134 // Reload IP
11135 if (PVT == MVT::i64) {
11136 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11137 .addImm(LabelOffset)
11138 .addReg(BufReg);
11139 } else {
11140 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11141 .addImm(LabelOffset)
11142 .addReg(BufReg);
11143 }
11144 MIB.cloneMemRefs(MI);
11145
11146 // Reload SP
11147 if (PVT == MVT::i64) {
11148 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11149 .addImm(SPOffset)
11150 .addReg(BufReg);
11151 } else {
11152 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11153 .addImm(SPOffset)
11154 .addReg(BufReg);
11155 }
11156 MIB.cloneMemRefs(MI);
11157
11158 // Reload BP
11159 if (PVT == MVT::i64) {
11160 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11161 .addImm(BPOffset)
11162 .addReg(BufReg);
11163 } else {
11164 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11165 .addImm(BPOffset)
11166 .addReg(BufReg);
11167 }
11168 MIB.cloneMemRefs(MI);
11169
11170 // Reload TOC
11171 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11173 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11174 .addImm(TOCOffset)
11175 .addReg(BufReg)
11176 .cloneMemRefs(MI);
11177 }
11178
11179 // Jump
11180 BuildMI(*MBB, MI, DL,
11181 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11182 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11183
11184 MI.eraseFromParent();
11185 return MBB;
11186}
11187
11189 // If the function specifically requests inline stack probes, emit them.
11190 if (MF.getFunction().hasFnAttribute("probe-stack"))
11191 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11192 "inline-asm";
11193 return false;
11194}
11195
11197 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11198 unsigned StackAlign = TFI->getStackAlignment();
11199 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11200 "Unexpected stack alignment");
11201 // The default stack probe size is 4096 if the function has no
11202 // stack-probe-size attribute.
11203 unsigned StackProbeSize = 4096;
11204 const Function &Fn = MF.getFunction();
11205 if (Fn.hasFnAttribute("stack-probe-size"))
11206 Fn.getFnAttribute("stack-probe-size")
11208 .getAsInteger(0, StackProbeSize);
11209 // Round down to the stack alignment.
11210 StackProbeSize &= ~(StackAlign - 1);
11211 return StackProbeSize ? StackProbeSize : StackAlign;
11212}
11213
11214// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11215// into three phases. In the first phase, it uses pseudo instruction
11216// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11217// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11218// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11219// MaxCallFrameSize so that it can calculate correct data area pointer.
11222 MachineBasicBlock *MBB) const {
11223 const bool isPPC64 = Subtarget.isPPC64();
11224 MachineFunction *MF = MBB->getParent();
11225 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11226 DebugLoc DL = MI.getDebugLoc();
11227 const unsigned ProbeSize = getStackProbeSize(*MF);
11228 const BasicBlock *ProbedBB = MBB->getBasicBlock();
11230 // The CFG of probing stack looks as
11231 // +-----+
11232 // | MBB |
11233 // +--+--+
11234 // |
11235 // +----v----+
11236 // +--->+ TestMBB +---+
11237 // | +----+----+ |
11238 // | | |
11239 // | +-----v----+ |
11240 // +---+ BlockMBB | |
11241 // +----------+ |
11242 // |
11243 // +---------+ |
11244 // | TailMBB +<--+
11245 // +---------+
11246 // In MBB, calculate previous frame pointer and final stack pointer.
11247 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11248 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11249 // TailMBB is spliced via \p MI.
11250 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11251 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11252 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11253
11255 MF->insert(MBBIter, TestMBB);
11256 MF->insert(MBBIter, BlockMBB);
11257 MF->insert(MBBIter, TailMBB);
11258
11259 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11260 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11261
11262 Register DstReg = MI.getOperand(0).getReg();
11263 Register NegSizeReg = MI.getOperand(1).getReg();
11264 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11265 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11266 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11267 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11268
11269 // Since value of NegSizeReg might be realigned in prologepilog, insert a
11270 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11271 // NegSize.
11272 unsigned ProbeOpc;
11273 if (!MRI.hasOneNonDBGUse(NegSizeReg))
11274 ProbeOpc =
11275 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11276 else
11277 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11278 // and NegSizeReg will be allocated in the same phyreg to avoid
11279 // redundant copy when NegSizeReg has only one use which is current MI and
11280 // will be replaced by PREPARE_PROBED_ALLOCA then.
11281 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11282 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11283 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11284 .addDef(ActualNegSizeReg)
11285 .addReg(NegSizeReg)
11286 .add(MI.getOperand(2))
11287 .add(MI.getOperand(3));
11288
11289 // Calculate final stack pointer, which equals to SP + ActualNegSize.
11290 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11291 FinalStackPtr)
11292 .addReg(SPReg)
11293 .addReg(ActualNegSizeReg);
11294
11295 // Materialize a scratch register for update.
11296 int64_t NegProbeSize = -(int64_t)ProbeSize;
11297 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11298 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11299 if (!isInt<16>(NegProbeSize)) {
11300 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11301 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11302 .addImm(NegProbeSize >> 16);
11303 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11304 ScratchReg)
11305 .addReg(TempReg)
11306 .addImm(NegProbeSize & 0xFFFF);
11307 } else
11308 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11309 .addImm(NegProbeSize);
11310
11311 {
11312 // Probing leading residual part.
11313 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11314 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11315 .addReg(ActualNegSizeReg)
11316 .addReg(ScratchReg);
11317 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11318 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11319 .addReg(Div)
11320 .addReg(ScratchReg);
11321 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11322 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11323 .addReg(Mul)
11324 .addReg(ActualNegSizeReg);
11325 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11326 .addReg(FramePointer)
11327 .addReg(SPReg)
11328 .addReg(NegMod);
11329 }
11330
11331 {
11332 // Remaining part should be multiple of ProbeSize.
11333 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11334 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11335 .addReg(SPReg)
11336 .addReg(FinalStackPtr);
11337 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11339 .addReg(CmpResult)
11340 .addMBB(TailMBB);
11341 TestMBB->addSuccessor(BlockMBB);
11342 TestMBB->addSuccessor(TailMBB);
11343 }
11344
11345 {
11346 // Touch the block.
11347 // |P...|P...|P...
11348 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11349 .addReg(FramePointer)
11350 .addReg(SPReg)
11351 .addReg(ScratchReg);
11352 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11353 BlockMBB->addSuccessor(TestMBB);
11354 }
11355
11356 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11357 // DYNAREAOFFSET pseudo instruction to get the future result.
11358 Register MaxCallFrameSizeReg =
11359 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11360 BuildMI(TailMBB, DL,
11361 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11362 MaxCallFrameSizeReg)
11363 .add(MI.getOperand(2))
11364 .add(MI.getOperand(3));
11365 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11366 .addReg(SPReg)
11367 .addReg(MaxCallFrameSizeReg);
11368
11369 // Splice instructions after MI to TailMBB.
11370 TailMBB->splice(TailMBB->end(), MBB,
11371 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11373 MBB->addSuccessor(TestMBB);
11374
11375 // Delete the pseudo instruction.
11376 MI.eraseFromParent();
11377
11378 ++NumDynamicAllocaProbed;
11379 return TailMBB;
11380}
11381
11384 MachineBasicBlock *BB) const {
11385 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11386 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11387 if (Subtarget.is64BitELFABI() &&
11388 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11389 !Subtarget.isUsingPCRelativeCalls()) {
11390 // Call lowering should have added an r2 operand to indicate a dependence
11391 // on the TOC base pointer value. It can't however, because there is no
11392 // way to mark the dependence as implicit there, and so the stackmap code
11393 // will confuse it with a regular operand. Instead, add the dependence
11394 // here.
11395 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11396 }
11397
11398 return emitPatchPoint(MI, BB);
11399 }
11400
11401 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11402 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11403 return emitEHSjLjSetJmp(MI, BB);
11404 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11405 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11406 return emitEHSjLjLongJmp(MI, BB);
11407 }
11408
11409 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11410
11411 // To "insert" these instructions we actually have to insert their
11412 // control-flow patterns.
11413 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11415
11416 MachineFunction *F = BB->getParent();
11417
11418 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11419 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11420 MI.getOpcode() == PPC::SELECT_I8) {
11422 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11423 MI.getOpcode() == PPC::SELECT_CC_I8)
11424 Cond.push_back(MI.getOperand(4));
11425 else
11427 Cond.push_back(MI.getOperand(1));
11428
11429 DebugLoc dl = MI.getDebugLoc();
11430 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11431 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11432 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11433 MI.getOpcode() == PPC::SELECT_CC_F8 ||
11434 MI.getOpcode() == PPC::SELECT_CC_F16 ||
11435 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11436 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11437 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11438 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11439 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11440 MI.getOpcode() == PPC::SELECT_CC_SPE ||
11441 MI.getOpcode() == PPC::SELECT_F4 ||
11442 MI.getOpcode() == PPC::SELECT_F8 ||
11443 MI.getOpcode() == PPC::SELECT_F16 ||
11444 MI.getOpcode() == PPC::SELECT_SPE ||
11445 MI.getOpcode() == PPC::SELECT_SPE4 ||
11446 MI.getOpcode() == PPC::SELECT_VRRC ||
11447 MI.getOpcode() == PPC::SELECT_VSFRC ||
11448 MI.getOpcode() == PPC::SELECT_VSSRC ||
11449 MI.getOpcode() == PPC::SELECT_VSRC) {
11450 // The incoming instruction knows the destination vreg to set, the
11451 // condition code register to branch on, the true/false values to
11452 // select between, and a branch opcode to use.
11453
11454 // thisMBB:
11455 // ...
11456 // TrueVal = ...
11457 // cmpTY ccX, r1, r2
11458 // bCC copy1MBB
11459 // fallthrough --> copy0MBB
11460 MachineBasicBlock *thisMBB = BB;
11461 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11462 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11463 DebugLoc dl = MI.getDebugLoc();
11464 F->insert(It, copy0MBB);
11465 F->insert(It, sinkMBB);
11466
11467 // Transfer the remainder of BB and its successor edges to sinkMBB.
11468 sinkMBB->splice(sinkMBB->begin(), BB,
11469 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11471
11472 // Next, add the true and fallthrough blocks as its successors.
11473 BB->addSuccessor(copy0MBB);
11474 BB->addSuccessor(sinkMBB);
11475
11476 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11477 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11478 MI.getOpcode() == PPC::SELECT_F16 ||
11479 MI.getOpcode() == PPC::SELECT_SPE4 ||
11480 MI.getOpcode() == PPC::SELECT_SPE ||
11481 MI.getOpcode() == PPC::SELECT_VRRC ||
11482 MI.getOpcode() == PPC::SELECT_VSFRC ||
11483 MI.getOpcode() == PPC::SELECT_VSSRC ||
11484 MI.getOpcode() == PPC::SELECT_VSRC) {
11485 BuildMI(BB, dl, TII->get(PPC::BC))
11486 .addReg(MI.getOperand(1).getReg())
11487 .addMBB(sinkMBB);
11488 } else {
11489 unsigned SelectPred = MI.getOperand(4).getImm();
11490 BuildMI(BB, dl, TII->get(PPC::BCC))
11491 .addImm(SelectPred)
11492 .addReg(MI.getOperand(1).getReg())
11493 .addMBB(sinkMBB);
11494 }
11495
11496 // copy0MBB:
11497 // %FalseValue = ...
11498 // # fallthrough to sinkMBB
11499 BB = copy0MBB;
11500
11501 // Update machine-CFG edges
11502 BB->addSuccessor(sinkMBB);
11503
11504 // sinkMBB:
11505 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11506 // ...
11507 BB = sinkMBB;
11508 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11509 .addReg(MI.getOperand(3).getReg())
11510 .addMBB(copy0MBB)
11511 .addReg(MI.getOperand(2).getReg())
11512 .addMBB(thisMBB);
11513 } else if (MI.getOpcode() == PPC::ReadTB) {
11514 // To read the 64-bit time-base register on a 32-bit target, we read the
11515 // two halves. Should the counter have wrapped while it was being read, we
11516 // need to try again.
11517 // ...
11518 // readLoop:
11519 // mfspr Rx,TBU # load from TBU
11520 // mfspr Ry,TB # load from TB
11521 // mfspr Rz,TBU # load from TBU
11522 // cmpw crX,Rx,Rz # check if 'old'='new'
11523 // bne readLoop # branch if they're not equal
11524 // ...
11525
11526 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11527 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11528 DebugLoc dl = MI.getDebugLoc();
11529 F->insert(It, readMBB);
11530 F->insert(It, sinkMBB);
11531
11532 // Transfer the remainder of BB and its successor edges to sinkMBB.
11533 sinkMBB->splice(sinkMBB->begin(), BB,
11534 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11536
11537 BB->addSuccessor(readMBB);
11538 BB = readMBB;
11539
11540 MachineRegisterInfo &RegInfo = F->getRegInfo();
11541 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11542 Register LoReg = MI.getOperand(0).getReg();
11543 Register HiReg = MI.getOperand(1).getReg();
11544
11545 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11546 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11547 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11548
11549 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11550
11551 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11552 .addReg(HiReg)
11553 .addReg(ReadAgainReg);
11554 BuildMI(BB, dl, TII->get(PPC::BCC))
11556 .addReg(CmpReg)
11557 .addMBB(readMBB);
11558
11559 BB->addSuccessor(readMBB);
11560 BB->addSuccessor(sinkMBB);
11561 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11562 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11563 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11564 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11565 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11566 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11567 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11568 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11569
11570 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11571 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11572 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11573 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11574 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11575 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11576 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11577 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11578
11579 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11580 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11581 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11582 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11583 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11584 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11585 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11586 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11587
11588 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11589 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11590 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11591 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11592 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11593 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11594 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11595 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11596
11597 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11598 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11599 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11600 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11601 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11602 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11603 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11604 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11605
11606 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11607 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11608 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11609 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11610 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11611 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11612 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11613 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11614
11615 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11616 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11617 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11618 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11619 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11620 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11621 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11622 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11623
11624 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11625 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11626 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11627 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11628 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11629 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11630 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11631 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11632
11633 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11634 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11635 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11636 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11637 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11638 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11639 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11640 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11641
11642 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11643 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11644 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11645 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11646 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11647 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11648 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11649 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11650
11651 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11652 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11653 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11654 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11655 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11656 BB = EmitAtomicBinary(MI, BB, 4, 0);
11657 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11658 BB = EmitAtomicBinary(MI, BB, 8, 0);
11659 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11660 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11661 (Subtarget.hasPartwordAtomics() &&
11662 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11663 (Subtarget.hasPartwordAtomics() &&
11664 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11665 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11666
11667 auto LoadMnemonic = PPC::LDARX;
11668 auto StoreMnemonic = PPC::STDCX;
11669 switch (MI.getOpcode()) {
11670 default:
11671 llvm_unreachable("Compare and swap of unknown size");
11672 case PPC::ATOMIC_CMP_SWAP_I8:
11673 LoadMnemonic = PPC::LBARX;
11674 StoreMnemonic = PPC::STBCX;
11675 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11676 break;
11677 case PPC::ATOMIC_CMP_SWAP_I16:
11678 LoadMnemonic = PPC::LHARX;
11679 StoreMnemonic = PPC::STHCX;
11680 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11681 break;
11682 case PPC::ATOMIC_CMP_SWAP_I32:
11683 LoadMnemonic = PPC::LWARX;
11684 StoreMnemonic = PPC::STWCX;
11685 break;
11686 case PPC::ATOMIC_CMP_SWAP_I64:
11687 LoadMnemonic = PPC::LDARX;
11688 StoreMnemonic = PPC::STDCX;
11689 break;
11690 }
11691 Register dest = MI.getOperand(0).getReg();
11692 Register ptrA = MI.getOperand(1).getReg();
11693 Register ptrB = MI.getOperand(2).getReg();
11694 Register oldval = MI.getOperand(3).getReg();
11695 Register newval = MI.getOperand(4).getReg();
11696 DebugLoc dl = MI.getDebugLoc();
11697
11698 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11699 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11700 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11701 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11702 F->insert(It, loop1MBB);
11703 F->insert(It, loop2MBB);
11704 F->insert(It, midMBB);
11705 F->insert(It, exitMBB);
11706 exitMBB->splice(exitMBB->begin(), BB,
11707 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11709
11710 // thisMBB:
11711 // ...
11712 // fallthrough --> loopMBB
11713 BB->addSuccessor(loop1MBB);
11714
11715 // loop1MBB:
11716 // l[bhwd]arx dest, ptr
11717 // cmp[wd] dest, oldval
11718 // bne- midMBB
11719 // loop2MBB:
11720 // st[bhwd]cx. newval, ptr
11721 // bne- loopMBB
11722 // b exitBB
11723 // midMBB:
11724 // st[bhwd]cx. dest, ptr
11725 // exitBB:
11726 BB = loop1MBB;
11727 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11728 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11729 .addReg(oldval)
11730 .addReg(dest);
11731 BuildMI(BB, dl, TII->get(PPC::BCC))
11733 .addReg(PPC::CR0)
11734 .addMBB(midMBB);
11735 BB->addSuccessor(loop2MBB);
11736 BB->addSuccessor(midMBB);
11737
11738 BB = loop2MBB;
11739 BuildMI(BB, dl, TII->get(StoreMnemonic))
11740 .addReg(newval)
11741 .addReg(ptrA)
11742 .addReg(ptrB);
11743 BuildMI(BB, dl, TII->get(PPC::BCC))
11745 .addReg(PPC::CR0)
11746 .addMBB(loop1MBB);
11747 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11748 BB->addSuccessor(loop1MBB);
11749 BB->addSuccessor(exitMBB);
11750
11751 BB = midMBB;
11752 BuildMI(BB, dl, TII->get(StoreMnemonic))
11753 .addReg(dest)
11754 .addReg(ptrA)
11755 .addReg(ptrB);
11756 BB->addSuccessor(exitMBB);
11757
11758 // exitMBB:
11759 // ...
11760 BB = exitMBB;
11761 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11762 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11763 // We must use 64-bit registers for addresses when targeting 64-bit,
11764 // since we're actually doing arithmetic on them. Other registers
11765 // can be 32-bit.
11766 bool is64bit = Subtarget.isPPC64();
11767 bool isLittleEndian = Subtarget.isLittleEndian();
11768 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11769
11770 Register dest = MI.getOperand(0).getReg();
11771 Register ptrA = MI.getOperand(1).getReg();
11772 Register ptrB = MI.getOperand(2).getReg();
11773 Register oldval = MI.getOperand(3).getReg();
11774 Register newval = MI.getOperand(4).getReg();
11775 DebugLoc dl = MI.getDebugLoc();
11776
11777 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11778 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11779 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11780 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11781 F->insert(It, loop1MBB);
11782 F->insert(It, loop2MBB);
11783 F->insert(It, midMBB);
11784 F->insert(It, exitMBB);
11785 exitMBB->splice(exitMBB->begin(), BB,
11786 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11788
11789 MachineRegisterInfo &RegInfo = F->getRegInfo();
11790 const TargetRegisterClass *RC =
11791 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11792 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11793
11794 Register PtrReg = RegInfo.createVirtualRegister(RC);
11795 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11796 Register ShiftReg =
11797 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11798 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11799 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11800 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11801 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11802 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11803 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11804 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11805 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11806 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11807 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11808 Register Ptr1Reg;
11809 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11810 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11811 // thisMBB:
11812 // ...
11813 // fallthrough --> loopMBB
11814 BB->addSuccessor(loop1MBB);
11815
11816 // The 4-byte load must be aligned, while a char or short may be
11817 // anywhere in the word. Hence all this nasty bookkeeping code.
11818 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11819 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11820 // xori shift, shift1, 24 [16]
11821 // rlwinm ptr, ptr1, 0, 0, 29
11822 // slw newval2, newval, shift
11823 // slw oldval2, oldval,shift
11824 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11825 // slw mask, mask2, shift
11826 // and newval3, newval2, mask
11827 // and oldval3, oldval2, mask
11828 // loop1MBB:
11829 // lwarx tmpDest, ptr
11830 // and tmp, tmpDest, mask
11831 // cmpw tmp, oldval3
11832 // bne- midMBB
11833 // loop2MBB:
11834 // andc tmp2, tmpDest, mask
11835 // or tmp4, tmp2, newval3
11836 // stwcx. tmp4, ptr
11837 // bne- loop1MBB
11838 // b exitBB
11839 // midMBB:
11840 // stwcx. tmpDest, ptr
11841 // exitBB:
11842 // srw dest, tmpDest, shift
11843 if (ptrA != ZeroReg) {
11844 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11845 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11846 .addReg(ptrA)
11847 .addReg(ptrB);
11848 } else {
11849 Ptr1Reg = ptrB;
11850 }
11851
11852 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11853 // mode.
11854 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11855 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11856 .addImm(3)
11857 .addImm(27)
11858 .addImm(is8bit ? 28 : 27);
11859 if (!isLittleEndian)
11860 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11861 .addReg(Shift1Reg)
11862 .addImm(is8bit ? 24 : 16);
11863 if (is64bit)
11864 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11865 .addReg(Ptr1Reg)
11866 .addImm(0)
11867 .addImm(61);
11868 else
11869 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11870 .addReg(Ptr1Reg)
11871 .addImm(0)
11872 .addImm(0)
11873 .addImm(29);
11874 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11875 .addReg(newval)
11876 .addReg(ShiftReg);
11877 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11878 .addReg(oldval)
11879 .addReg(ShiftReg);
11880 if (is8bit)
11881 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11882 else {
11883 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11884 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11885 .addReg(Mask3Reg)
11886 .addImm(65535);
11887 }
11888 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11889 .addReg(Mask2Reg)
11890 .addReg(ShiftReg);
11891 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11892 .addReg(NewVal2Reg)
11893 .addReg(MaskReg);
11894 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11895 .addReg(OldVal2Reg)
11896 .addReg(MaskReg);
11897
11898 BB = loop1MBB;
11899 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11900 .addReg(ZeroReg)
11901 .addReg(PtrReg);
11902 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11903 .addReg(TmpDestReg)
11904 .addReg(MaskReg);
11905 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11906 .addReg(TmpReg)
11907 .addReg(OldVal3Reg);
11908 BuildMI(BB, dl, TII->get(PPC::BCC))
11910 .addReg(PPC::CR0)
11911 .addMBB(midMBB);
11912 BB->addSuccessor(loop2MBB);
11913 BB->addSuccessor(midMBB);
11914
11915 BB = loop2MBB;
11916 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11917 .addReg(TmpDestReg)
11918 .addReg(MaskReg);
11919 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11920 .addReg(Tmp2Reg)
11921 .addReg(NewVal3Reg);
11922 BuildMI(BB, dl, TII->get(PPC::STWCX))
11923 .addReg(Tmp4Reg)
11924 .addReg(ZeroReg)
11925 .addReg(PtrReg);
11926 BuildMI(BB, dl, TII->get(PPC::BCC))
11928 .addReg(PPC::CR0)
11929 .addMBB(loop1MBB);
11930 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11931 BB->addSuccessor(loop1MBB);
11932 BB->addSuccessor(exitMBB);
11933
11934 BB = midMBB;
11935 BuildMI(BB, dl, TII->get(PPC::STWCX))
11936 .addReg(TmpDestReg)
11937 .addReg(ZeroReg)
11938 .addReg(PtrReg);
11939 BB->addSuccessor(exitMBB);
11940
11941 // exitMBB:
11942 // ...
11943 BB = exitMBB;
11944 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11945 .addReg(TmpReg)
11946 .addReg(ShiftReg);
11947 } else if (MI.getOpcode() == PPC::FADDrtz) {
11948 // This pseudo performs an FADD with rounding mode temporarily forced
11949 // to round-to-zero. We emit this via custom inserter since the FPSCR
11950 // is not modeled at the SelectionDAG level.
11951 Register Dest = MI.getOperand(0).getReg();
11952 Register Src1 = MI.getOperand(1).getReg();
11953 Register Src2 = MI.getOperand(2).getReg();
11954 DebugLoc dl = MI.getDebugLoc();
11955
11956 MachineRegisterInfo &RegInfo = F->getRegInfo();
11957 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11958
11959 // Save FPSCR value.
11960 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11961
11962 // Set rounding mode to round-to-zero.
11963 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
11964 .addImm(31)
11966
11967 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
11968 .addImm(30)
11970
11971 // Perform addition.
11972 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
11973 .addReg(Src1)
11974 .addReg(Src2);
11975 if (MI.getFlag(MachineInstr::NoFPExcept))
11977
11978 // Restore FPSCR value.
11979 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11980 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11981 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
11982 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11983 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
11984 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11985 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
11986 ? PPC::ANDI8_rec
11987 : PPC::ANDI_rec;
11988 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11989 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
11990
11991 MachineRegisterInfo &RegInfo = F->getRegInfo();
11992 Register Dest = RegInfo.createVirtualRegister(
11993 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11994
11995 DebugLoc Dl = MI.getDebugLoc();
11996 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
11997 .addReg(MI.getOperand(1).getReg())
11998 .addImm(1);
11999 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12000 MI.getOperand(0).getReg())
12001 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12002 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12003 DebugLoc Dl = MI.getDebugLoc();
12004 MachineRegisterInfo &RegInfo = F->getRegInfo();
12005 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12006 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12007 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12008 MI.getOperand(0).getReg())
12009 .addReg(CRReg);
12010 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12011 DebugLoc Dl = MI.getDebugLoc();
12012 unsigned Imm = MI.getOperand(1).getImm();
12013 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12014 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12015 MI.getOperand(0).getReg())
12016 .addReg(PPC::CR0EQ);
12017 } else if (MI.getOpcode() == PPC::SETRNDi) {
12018 DebugLoc dl = MI.getDebugLoc();
12019 Register OldFPSCRReg = MI.getOperand(0).getReg();
12020
12021 // Save FPSCR value.
12022 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12023
12024 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12025 // the following settings:
12026 // 00 Round to nearest
12027 // 01 Round to 0
12028 // 10 Round to +inf
12029 // 11 Round to -inf
12030
12031 // When the operand is immediate, using the two least significant bits of
12032 // the immediate to set the bits 62:63 of FPSCR.
12033 unsigned Mode = MI.getOperand(1).getImm();
12034 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12035 .addImm(31)
12037
12038 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12039 .addImm(30)
12041 } else if (MI.getOpcode() == PPC::SETRND) {
12042 DebugLoc dl = MI.getDebugLoc();
12043
12044 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12045 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12046 // If the target doesn't have DirectMove, we should use stack to do the
12047 // conversion, because the target doesn't have the instructions like mtvsrd
12048 // or mfvsrd to do this conversion directly.
12049 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12050 if (Subtarget.hasDirectMove()) {
12051 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12052 .addReg(SrcReg);
12053 } else {
12054 // Use stack to do the register copy.
12055 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12056 MachineRegisterInfo &RegInfo = F->getRegInfo();
12057 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12058 if (RC == &PPC::F8RCRegClass) {
12059 // Copy register from F8RCRegClass to G8RCRegclass.
12060 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12061 "Unsupported RegClass.");
12062
12063 StoreOp = PPC::STFD;
12064 LoadOp = PPC::LD;
12065 } else {
12066 // Copy register from G8RCRegClass to F8RCRegclass.
12067 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12068 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12069 "Unsupported RegClass.");
12070 }
12071
12072 MachineFrameInfo &MFI = F->getFrameInfo();
12073 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12074
12075 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12076 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12078 MFI.getObjectAlign(FrameIdx));
12079
12080 // Store the SrcReg into the stack.
12081 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12082 .addReg(SrcReg)
12083 .addImm(0)
12084 .addFrameIndex(FrameIdx)
12085 .addMemOperand(MMOStore);
12086
12087 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12088 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12090 MFI.getObjectAlign(FrameIdx));
12091
12092 // Load from the stack where SrcReg is stored, and save to DestReg,
12093 // so we have done the RegClass conversion from RegClass::SrcReg to
12094 // RegClass::DestReg.
12095 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12096 .addImm(0)
12097 .addFrameIndex(FrameIdx)
12098 .addMemOperand(MMOLoad);
12099 }
12100 };
12101
12102 Register OldFPSCRReg = MI.getOperand(0).getReg();
12103
12104 // Save FPSCR value.
12105 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12106
12107 // When the operand is gprc register, use two least significant bits of the
12108 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12109 //
12110 // copy OldFPSCRTmpReg, OldFPSCRReg
12111 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12112 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12113 // copy NewFPSCRReg, NewFPSCRTmpReg
12114 // mtfsf 255, NewFPSCRReg
12115 MachineOperand SrcOp = MI.getOperand(1);
12116 MachineRegisterInfo &RegInfo = F->getRegInfo();
12117 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12118
12119 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12120
12121 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12122 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12123
12124 // The first operand of INSERT_SUBREG should be a register which has
12125 // subregisters, we only care about its RegClass, so we should use an
12126 // IMPLICIT_DEF register.
12127 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12128 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12129 .addReg(ImDefReg)
12130 .add(SrcOp)
12131 .addImm(1);
12132
12133 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12134 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12135 .addReg(OldFPSCRTmpReg)
12136 .addReg(ExtSrcReg)
12137 .addImm(0)
12138 .addImm(62);
12139
12140 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12141 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12142
12143 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12144 // bits of FPSCR.
12145 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12146 .addImm(255)
12147 .addReg(NewFPSCRReg)
12148 .addImm(0)
12149 .addImm(0);
12150 } else if (MI.getOpcode() == PPC::SETFLM) {
12151 DebugLoc Dl = MI.getDebugLoc();
12152
12153 // Result of setflm is previous FPSCR content, so we need to save it first.
12154 Register OldFPSCRReg = MI.getOperand(0).getReg();
12155 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12156
12157 // Put bits in 32:63 to FPSCR.
12158 Register NewFPSCRReg = MI.getOperand(1).getReg();
12159 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12160 .addImm(255)
12161 .addReg(NewFPSCRReg)
12162 .addImm(0)
12163 .addImm(0);
12164 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12165 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12166 return emitProbedAlloca(MI, BB);
12167 } else {
12168 llvm_unreachable("Unexpected instr type to insert");
12169 }
12170
12171 MI.eraseFromParent(); // The pseudo instruction is gone now.
12172 return BB;
12173}
12174
12175//===----------------------------------------------------------------------===//
12176// Target Optimization Hooks
12177//===----------------------------------------------------------------------===//
12178
12179static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12180 // For the estimates, convergence is quadratic, so we essentially double the
12181 // number of digits correct after every iteration. For both FRE and FRSQRTE,
12182 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12183 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12184 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12185 if (VT.getScalarType() == MVT::f64)
12186 RefinementSteps++;
12187 return RefinementSteps;
12188}
12189
12190SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12191 const DenormalMode &Mode) const {
12192 // We only have VSX Vector Test for software Square Root.
12193 EVT VT = Op.getValueType();
12194 if (!isTypeLegal(MVT::i1) ||
12195 (VT != MVT::f64 &&
12196 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12197 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
12198
12199 SDLoc DL(Op);
12200 // The output register of FTSQRT is CR field.
12202 // ftsqrt BF,FRB
12203 // Let e_b be the unbiased exponent of the double-precision
12204 // floating-point operand in register FRB.
12205 // fe_flag is set to 1 if either of the following conditions occurs.
12206 // - The double-precision floating-point operand in register FRB is a zero,
12207 // a NaN, or an infinity, or a negative value.
12208 // - e_b is less than or equal to -970.
12209 // Otherwise fe_flag is set to 0.
12210 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12211 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12212 // exponent is less than -970)
12213 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12214 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12215 FTSQRT, SRIdxVal),
12216 0);
12217}
12218
12219SDValue
12220PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12221 SelectionDAG &DAG) const {
12222 // We only have VSX Vector Square Root.
12223 EVT VT = Op.getValueType();
12224 if (VT != MVT::f64 &&
12225 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12227
12228 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12229}
12230
12231SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12232 int Enabled, int &RefinementSteps,
12233 bool &UseOneConstNR,
12234 bool Reciprocal) const {
12235 EVT VT = Operand.getValueType();
12236 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12237 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12238 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12239 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12240 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12241 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12242
12243 // The Newton-Raphson computation with a single constant does not provide
12244 // enough accuracy on some CPUs.
12245 UseOneConstNR = !Subtarget.needsTwoConstNR();
12246 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12247 }
12248 return SDValue();
12249}
12250
12251SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12252 int Enabled,
12253 int &RefinementSteps) const {
12254 EVT VT = Operand.getValueType();
12255 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12256 (VT == MVT::f64 && Subtarget.hasFRE()) ||
12257 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12258 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12259 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12260 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12261 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12262 }
12263 return SDValue();
12264}
12265
12266unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12267 // Note: This functionality is used only when unsafe-fp-math is enabled, and
12268 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12269 // enabled for division), this functionality is redundant with the default
12270 // combiner logic (once the division -> reciprocal/multiply transformation
12271 // has taken place). As a result, this matters more for older cores than for
12272 // newer ones.
12273
12274 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12275 // reciprocal if there are two or more FDIVs (for embedded cores with only
12276 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12277 switch (Subtarget.getCPUDirective()) {
12278 default:
12279 return 3;
12280 case PPC::DIR_440:
12281 case PPC::DIR_A2:
12282 case PPC::DIR_E500:
12283 case PPC::DIR_E500mc:
12284 case PPC::DIR_E5500:
12285 return 2;
12286 }
12287}
12288
12289// isConsecutiveLSLoc needs to work even if all adds have not yet been
12290// collapsed, and so we need to look through chains of them.
12292 int64_t& Offset, SelectionDAG &DAG) {
12293 if (DAG.isBaseWithConstantOffset(Loc)) {
12294 Base = Loc.getOperand(0);
12295 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12296
12297 // The base might itself be a base plus an offset, and if so, accumulate
12298 // that as well.
12299 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12300 }
12301}
12302
12303static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12304 unsigned Bytes, int Dist,
12305 SelectionDAG &DAG) {
12306 if (VT.getSizeInBits() / 8 != Bytes)
12307 return false;
12308
12309 SDValue BaseLoc = Base->getBasePtr();
12310 if (Loc.getOpcode() == ISD::FrameIndex) {
12311 if (BaseLoc.getOpcode() != ISD::FrameIndex)
12312 return false;
12314 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12315 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12316 int FS = MFI.getObjectSize(FI);
12317 int BFS = MFI.getObjectSize(BFI);
12318 if (FS != BFS || FS != (int)Bytes) return false;
12319 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12320 }
12321
12322 SDValue Base1 = Loc, Base2 = BaseLoc;
12323 int64_t Offset1 = 0, Offset2 = 0;
12324 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12325 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12326 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12327 return true;
12328
12329 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12330 const GlobalValue *GV1 = nullptr;
12331 const GlobalValue *GV2 = nullptr;
12332 Offset1 = 0;
12333 Offset2 = 0;
12334 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12335 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12336 if (isGA1 && isGA2 && GV1 == GV2)
12337 return Offset1 == (Offset2 + Dist*Bytes);
12338 return false;
12339}
12340
12341// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12342// not enforce equality of the chain operands.
12344 unsigned Bytes, int Dist,
12345 SelectionDAG &DAG) {
12347 EVT VT = LS->getMemoryVT();
12348 SDValue Loc = LS->getBasePtr();
12349 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12350 }
12351
12352 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12353 EVT VT;
12354 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12355 default: return false;
12356 case Intrinsic::ppc_altivec_lvx:
12357 case Intrinsic::ppc_altivec_lvxl:
12358 case Intrinsic::ppc_vsx_lxvw4x:
12359 case Intrinsic::ppc_vsx_lxvw4x_be:
12360 VT = MVT::v4i32;
12361 break;
12362 case Intrinsic::ppc_vsx_lxvd2x:
12363 case Intrinsic::ppc_vsx_lxvd2x_be:
12364 VT = MVT::v2f64;
12365 break;
12366 case Intrinsic::ppc_altivec_lvebx:
12367 VT = MVT::i8;
12368 break;
12369 case Intrinsic::ppc_altivec_lvehx:
12370 VT = MVT::i16;
12371 break;
12372 case Intrinsic::ppc_altivec_lvewx:
12373 VT = MVT::i32;
12374 break;
12375 }
12376
12377 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12378 }
12379
12380 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12381 EVT VT;
12382 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12383 default: return false;
12384 case Intrinsic::ppc_altivec_stvx:
12385 case Intrinsic::ppc_altivec_stvxl:
12386 case Intrinsic::ppc_vsx_stxvw4x:
12387 VT = MVT::v4i32;
12388 break;
12389 case Intrinsic::ppc_vsx_stxvd2x:
12390 VT = MVT::v2f64;
12391 break;
12392 case Intrinsic::ppc_vsx_stxvw4x_be:
12393 VT = MVT::v4i32;
12394 break;
12395 case Intrinsic::ppc_vsx_stxvd2x_be:
12396 VT = MVT::v2f64;
12397 break;
12398 case Intrinsic::ppc_altivec_stvebx:
12399 VT = MVT::i8;
12400 break;
12401 case Intrinsic::ppc_altivec_stvehx:
12402 VT = MVT::i16;
12403 break;
12404 case Intrinsic::ppc_altivec_stvewx:
12405 VT = MVT::i32;
12406 break;
12407 }
12408
12409 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12410 }
12411
12412 return false;
12413}
12414
12415// Return true is there is a nearyby consecutive load to the one provided
12416// (regardless of alignment). We search up and down the chain, looking though
12417// token factors and other loads (but nothing else). As a result, a true result
12418// indicates that it is safe to create a new consecutive load adjacent to the
12419// load provided.
12421 SDValue Chain = LD->getChain();
12422 EVT VT = LD->getMemoryVT();
12423
12424 SmallSet<SDNode *, 16> LoadRoots;
12425 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12426 SmallSet<SDNode *, 16> Visited;
12427
12428 // First, search up the chain, branching to follow all token-factor operands.
12429 // If we find a consecutive load, then we're done, otherwise, record all
12430 // nodes just above the top-level loads and token factors.
12431 while (!Queue.empty()) {
12432 SDNode *ChainNext = Queue.pop_back_val();
12433 if (!Visited.insert(ChainNext).second)
12434 continue;
12435
12436 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12437 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12438 return true;
12439
12440 if (!Visited.count(ChainLD->getChain().getNode()))
12441 Queue.push_back(ChainLD->getChain().getNode());
12442 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12443 for (const SDUse &O : ChainNext->ops())
12444 if (!Visited.count(O.getNode()))
12445 Queue.push_back(O.getNode());
12446 } else
12447 LoadRoots.insert(ChainNext);
12448 }
12449
12450 // Second, search down the chain, starting from the top-level nodes recorded
12451 // in the first phase. These top-level nodes are the nodes just above all
12452 // loads and token factors. Starting with their uses, recursively look though
12453 // all loads (just the chain uses) and token factors to find a consecutive
12454 // load.
12455 Visited.clear();
12456 Queue.clear();
12457
12458 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12459 IE = LoadRoots.end(); I != IE; ++I) {
12460 Queue.push_back(*I);
12461
12462 while (!Queue.empty()) {
12463 SDNode *LoadRoot = Queue.pop_back_val();
12464 if (!Visited.insert(LoadRoot).second)
12465 continue;
12466
12467 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12468 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12469 return true;
12470
12471 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12472 UE = LoadRoot->use_end(); UI != UE; ++UI)
12473 if (((isa<MemSDNode>(*UI) &&
12474 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12475 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12476 Queue.push_back(*UI);
12477 }
12478 }
12479
12480 return false;
12481}
12482
12483/// This function is called when we have proved that a SETCC node can be replaced
12484/// by subtraction (and other supporting instructions) so that the result of
12485/// comparison is kept in a GPR instead of CR. This function is purely for
12486/// codegen purposes and has some flags to guide the codegen process.
12487static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12488 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12489 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12490
12491 // Zero extend the operands to the largest legal integer. Originally, they
12492 // must be of a strictly smaller size.
12493 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12494 DAG.getConstant(Size, DL, MVT::i32));
12495 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12496 DAG.getConstant(Size, DL, MVT::i32));
12497
12498 // Swap if needed. Depends on the condition code.
12499 if (Swap)
12500 std::swap(Op0, Op1);
12501
12502 // Subtract extended integers.
12503 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12504
12505 // Move the sign bit to the least significant position and zero out the rest.
12506 // Now the least significant bit carries the result of original comparison.
12507 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12508 DAG.getConstant(Size - 1, DL, MVT::i32));
12509 auto Final = Shifted;
12510
12511 // Complement the result if needed. Based on the condition code.
12512 if (Complement)
12513 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12514 DAG.getConstant(1, DL, MVT::i64));
12515
12516 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12517}
12518
12519SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12520 DAGCombinerInfo &DCI) const {
12521 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12522
12523 SelectionDAG &DAG = DCI.DAG;
12524 SDLoc DL(N);
12525
12526 // Size of integers being compared has a critical role in the following
12527 // analysis, so we prefer to do this when all types are legal.
12528 if (!DCI.isAfterLegalizeDAG())
12529 return SDValue();
12530
12531 // If all users of SETCC extend its value to a legal integer type
12532 // then we replace SETCC with a subtraction
12533 for (SDNode::use_iterator UI = N->use_begin(),
12534 UE = N->use_end(); UI != UE; ++UI) {
12535 if (UI->getOpcode() != ISD::ZERO_EXTEND)
12536 return SDValue();
12537 }
12538
12539 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12540 auto OpSize = N->getOperand(0).getValueSizeInBits();
12541
12543
12544 if (OpSize < Size) {
12545 switch (CC) {
12546 default: break;
12547 case ISD::SETULT:
12548 return generateEquivalentSub(N, Size, false, false, DL, DAG);
12549 case ISD::SETULE:
12550 return generateEquivalentSub(N, Size, true, true, DL, DAG);
12551 case ISD::SETUGT:
12552 return generateEquivalentSub(N, Size, false, true, DL, DAG);
12553 case ISD::SETUGE:
12554 return generateEquivalentSub(N, Size, true, false, DL, DAG);
12555 }
12556 }
12557
12558 return SDValue();
12559}
12560
12561SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12562 DAGCombinerInfo &DCI) const {
12563 SelectionDAG &DAG = DCI.DAG;
12564 SDLoc dl(N);
12565
12566 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12567 // If we're tracking CR bits, we need to be careful that we don't have:
12568 // trunc(binary-ops(zext(x), zext(y)))
12569 // or
12570 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12571 // such that we're unnecessarily moving things into GPRs when it would be
12572 // better to keep them in CR bits.
12573
12574 // Note that trunc here can be an actual i1 trunc, or can be the effective
12575 // truncation that comes from a setcc or select_cc.
12576 if (N->getOpcode() == ISD::TRUNCATE &&
12577 N->getValueType(0) != MVT::i1)
12578 return SDValue();
12579
12580 if (N->getOperand(0).getValueType() != MVT::i32 &&
12581 N->getOperand(0).getValueType() != MVT::i64)
12582 return SDValue();
12583
12584 if (N->getOpcode() == ISD::SETCC ||
12585 N->getOpcode() == ISD::SELECT_CC) {
12586 // If we're looking at a comparison, then we need to make sure that the
12587 // high bits (all except for the first) don't matter the result.
12588 ISD::CondCode CC =
12589 cast<CondCodeSDNode>(N->getOperand(
12590 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12591 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12592
12593 if (ISD::isSignedIntSetCC(CC)) {
12594 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12595 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12596 return SDValue();
12597 } else if (ISD::isUnsignedIntSetCC(CC)) {
12598 if (!DAG.MaskedValueIsZero(N->getOperand(0),
12599 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12600 !DAG.MaskedValueIsZero(N->getOperand(1),
12601 APInt::getHighBitsSet(OpBits, OpBits-1)))
12602 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12603 : SDValue());
12604 } else {
12605 // This is neither a signed nor an unsigned comparison, just make sure
12606 // that the high bits are equal.
12607 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12608 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12609
12610 // We don't really care about what is known about the first bit (if
12611 // anything), so pretend that it is known zero for both to ensure they can
12612 // be compared as constants.
12613 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
12614 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
12615
12616 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
12617 Op1Known.getConstant() != Op2Known.getConstant())
12618 return SDValue();
12619 }
12620 }
12621
12622 // We now know that the higher-order bits are irrelevant, we just need to
12623 // make sure that all of the intermediate operations are bit operations, and
12624 // all inputs are extensions.
12625 if (N->getOperand(0).getOpcode() != ISD::AND &&
12626 N->getOperand(0).getOpcode() != ISD::OR &&
12627 N->getOperand(0).getOpcode() != ISD::XOR &&
12628 N->getOperand(0).getOpcode() != ISD::SELECT &&
12629 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12630 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12631 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12632 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12633 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12634 return SDValue();
12635
12636 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12637 N->getOperand(1).getOpcode() != ISD::AND &&
12638 N->getOperand(1).getOpcode() != ISD::OR &&
12639 N->getOperand(1).getOpcode() != ISD::XOR &&
12640 N->getOperand(1).getOpcode() != ISD::SELECT &&
12641 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12642 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12643 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12644 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12645 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12646 return SDValue();
12647
12649 SmallVector<SDValue, 8> BinOps, PromOps;
12651
12652 for (unsigned i = 0; i < 2; ++i) {
12653 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12654 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12655 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12656 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12657 isa<ConstantSDNode>(N->getOperand(i)))
12658 Inputs.push_back(N->getOperand(i));
12659 else
12660 BinOps.push_back(N->getOperand(i));
12661
12662 if (N->getOpcode() == ISD::TRUNCATE)
12663 break;
12664 }
12665
12666 // Visit all inputs, collect all binary operations (and, or, xor and
12667 // select) that are all fed by extensions.
12668 while (!BinOps.empty()) {
12669 SDValue BinOp = BinOps.pop_back_val();
12670
12671 if (!Visited.insert(BinOp.getNode()).second)
12672 continue;
12673
12674 PromOps.push_back(BinOp);
12675
12676 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12677 // The condition of the select is not promoted.
12678 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12679 continue;
12680 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12681 continue;
12682
12683 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12684 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12685 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12686 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12687 isa<ConstantSDNode>(BinOp.getOperand(i))) {
12688 Inputs.push_back(BinOp.getOperand(i));
12689 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12690 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12691 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12692 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12693 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12694 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12695 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12696 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12697 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12698 BinOps.push_back(BinOp.getOperand(i));
12699 } else {
12700 // We have an input that is not an extension or another binary
12701 // operation; we'll abort this transformation.
12702 return SDValue();
12703 }
12704 }
12705 }
12706
12707 // Make sure that this is a self-contained cluster of operations (which
12708 // is not quite the same thing as saying that everything has only one
12709 // use).
12710 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12711 if (isa<ConstantSDNode>(Inputs[i]))
12712 continue;
12713
12714 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12715 UE = Inputs[i].getNode()->use_end();
12716 UI != UE; ++UI) {
12717 SDNode *User = *UI;
12718 if (User != N && !Visited.count(User))
12719 return SDValue();
12720
12721 // Make sure that we're not going to promote the non-output-value
12722 // operand(s) or SELECT or SELECT_CC.
12723 // FIXME: Although we could sometimes handle this, and it does occur in
12724 // practice that one of the condition inputs to the select is also one of
12725 // the outputs, we currently can't deal with this.
12726 if (User->getOpcode() == ISD::SELECT) {
12727 if (User->getOperand(0) == Inputs[i])
12728 return SDValue();
12729 } else if (User->getOpcode() == ISD::SELECT_CC) {
12730 if (User->getOperand(0) == Inputs[i] ||
12731 User->getOperand(1) == Inputs[i])
12732 return SDValue();
12733 }
12734 }
12735 }
12736
12737 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12738 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12739 UE = PromOps[i].getNode()->use_end();
12740 UI != UE; ++UI) {
12741 SDNode *User = *UI;
12742 if (User != N && !Visited.count(User))
12743 return SDValue();
12744
12745 // Make sure that we're not going to promote the non-output-value
12746 // operand(s) or SELECT or SELECT_CC.
12747 // FIXME: Although we could sometimes handle this, and it does occur in
12748 // practice that one of the condition inputs to the select is also one of
12749 // the outputs, we currently can't deal with this.
12750 if (User->getOpcode() == ISD::SELECT) {
12751 if (User->getOperand(0) == PromOps[i])
12752 return SDValue();
12753 } else if (User->getOpcode() == ISD::SELECT_CC) {
12754 if (User->getOperand(0) == PromOps[i] ||
12755 User->getOperand(1) == PromOps[i])
12756 return SDValue();
12757 }
12758 }
12759 }
12760
12761 // Replace all inputs with the extension operand.
12762 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12763 // Constants may have users outside the cluster of to-be-promoted nodes,
12764 // and so we need to replace those as we do the promotions.
12765 if (isa<ConstantSDNode>(Inputs[i]))
12766 continue;
12767 else
12768 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12769 }
12770
12771 std::list<HandleSDNode> PromOpHandles;
12772 for (auto &PromOp : PromOps)
12773 PromOpHandles.emplace_back(PromOp);
12774
12775 // Replace all operations (these are all the same, but have a different
12776 // (i1) return type). DAG.getNode will validate that the types of
12777 // a binary operator match, so go through the list in reverse so that
12778 // we've likely promoted both operands first. Any intermediate truncations or
12779 // extensions disappear.
12780 while (!PromOpHandles.empty()) {
12781 SDValue PromOp = PromOpHandles.back().getValue();
12782 PromOpHandles.pop_back();
12783
12784 if (PromOp.getOpcode() == ISD::TRUNCATE ||
12785 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12786 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12787 PromOp.getOpcode() == ISD::ANY_EXTEND) {
12788 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12789 PromOp.getOperand(0).getValueType() != MVT::i1) {
12790 // The operand is not yet ready (see comment below).
12791 PromOpHandles.emplace_front(PromOp);
12792 continue;
12793 }
12794
12795 SDValue RepValue = PromOp.getOperand(0);
12796 if (isa<ConstantSDNode>(RepValue))
12797 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12798
12799 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12800 continue;
12801 }
12802
12803 unsigned C;
12804 switch (PromOp.getOpcode()) {
12805 default: C = 0; break;
12806 case ISD::SELECT: C = 1; break;
12807 case ISD::SELECT_CC: C = 2; break;
12808 }
12809
12810 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12811 PromOp.getOperand(C).getValueType() != MVT::i1) ||
12812 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12813 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12814 // The to-be-promoted operands of this node have not yet been
12815 // promoted (this should be rare because we're going through the
12816 // list backward, but if one of the operands has several users in
12817 // this cluster of to-be-promoted nodes, it is possible).
12818 PromOpHandles.emplace_front(PromOp);
12819 continue;
12820 }
12821
12823 PromOp.getNode()->op_end());
12824
12825 // If there are any constant inputs, make sure they're replaced now.
12826 for (unsigned i = 0; i < 2; ++i)
12827 if (isa<ConstantSDNode>(Ops[C+i]))
12828 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12829
12830 DAG.ReplaceAllUsesOfValueWith(PromOp,
12831 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12832 }
12833
12834 // Now we're left with the initial truncation itself.
12835 if (N->getOpcode() == ISD::TRUNCATE)
12836 return N->getOperand(0);
12837
12838 // Otherwise, this is a comparison. The operands to be compared have just
12839 // changed type (to i1), but everything else is the same.
12840 return SDValue(N, 0);
12841}
12842
12843SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12844 DAGCombinerInfo &DCI) const {
12845 SelectionDAG &DAG = DCI.DAG;
12846 SDLoc dl(N);
12847
12848 // If we're tracking CR bits, we need to be careful that we don't have:
12849 // zext(binary-ops(trunc(x), trunc(y)))
12850 // or
12851 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12852 // such that we're unnecessarily moving things into CR bits that can more
12853 // efficiently stay in GPRs. Note that if we're not certain that the high
12854 // bits are set as required by the final extension, we still may need to do
12855 // some masking to get the proper behavior.
12856
12857 // This same functionality is important on PPC64 when dealing with
12858 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12859 // the return values of functions. Because it is so similar, it is handled
12860 // here as well.
12861
12862 if (N->getValueType(0) != MVT::i32 &&
12863 N->getValueType(0) != MVT::i64)
12864 return SDValue();
12865
12866 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12867 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12868 return SDValue();
12869
12870 if (N->getOperand(0).getOpcode() != ISD::AND &&
12871 N->getOperand(0).getOpcode() != ISD::OR &&
12872 N->getOperand(0).getOpcode() != ISD::XOR &&
12873 N->getOperand(0).getOpcode() != ISD::SELECT &&
12874 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12875 return SDValue();
12876
12878 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12880
12881 // Visit all inputs, collect all binary operations (and, or, xor and
12882 // select) that are all fed by truncations.
12883 while (!BinOps.empty()) {
12884 SDValue BinOp = BinOps.pop_back_val();
12885
12886 if (!Visited.insert(BinOp.getNode()).second)
12887 continue;
12888
12889 PromOps.push_back(BinOp);
12890
12891 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12892 // The condition of the select is not promoted.
12893 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12894 continue;
12895 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12896 continue;
12897
12898 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12899 isa<ConstantSDNode>(BinOp.getOperand(i))) {
12900 Inputs.push_back(BinOp.getOperand(i));
12901 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12902 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12903 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12904 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12905 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12906 BinOps.push_back(BinOp.getOperand(i));
12907 } else {
12908 // We have an input that is not a truncation or another binary
12909 // operation; we'll abort this transformation.
12910 return SDValue();
12911 }
12912 }
12913 }
12914
12915 // The operands of a select that must be truncated when the select is
12916 // promoted because the operand is actually part of the to-be-promoted set.
12917 DenseMap<SDNode *, EVT> SelectTruncOp[2];
12918
12919 // Make sure that this is a self-contained cluster of operations (which
12920 // is not quite the same thing as saying that everything has only one
12921 // use).
12922 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12923 if (isa<ConstantSDNode>(Inputs[i]))
12924 continue;
12925
12926 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12927 UE = Inputs[i].getNode()->use_end();
12928 UI != UE; ++UI) {
12929 SDNode *User = *UI;
12930 if (User != N && !Visited.count(User))
12931 return SDValue();
12932
12933 // If we're going to promote the non-output-value operand(s) or SELECT or
12934 // SELECT_CC, record them for truncation.
12935 if (User->getOpcode() == ISD::SELECT) {
12936 if (User->getOperand(0) == Inputs[i])
12937 SelectTruncOp[0].insert(std::make_pair(User,
12938 User->getOperand(0).getValueType()));
12939 } else if (User->getOpcode() == ISD::SELECT_CC) {
12940 if (User->getOperand(0) == Inputs[i])
12941 SelectTruncOp[0].insert(std::make_pair(User,
12942 User->getOperand(0).getValueType()));
12943 if (User->getOperand(1) == Inputs[i])
12944 SelectTruncOp[1].insert(std::make_pair(User,
12945 User->getOperand(1).getValueType()));
12946 }
12947 }
12948 }
12949
12950 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12951 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12952 UE = PromOps[i].getNode()->use_end();
12953 UI != UE; ++UI) {
12954 SDNode *User = *UI;
12955 if (User != N && !Visited.count(User))
12956 return SDValue();
12957
12958 // If we're going to promote the non-output-value operand(s) or SELECT or
12959 // SELECT_CC, record them for truncation.
12960 if (User->getOpcode() == ISD::SELECT) {
12961 if (User->getOperand(0) == PromOps[i])
12962 SelectTruncOp[0].insert(std::make_pair(User,
12963 User->getOperand(0).getValueType()));
12964 } else if (User->getOpcode() == ISD::SELECT_CC) {
12965 if (User->getOperand(0) == PromOps[i])
12966 SelectTruncOp[0].insert(std::make_pair(User,
12967 User->getOperand(0).getValueType()));
12968 if (User->getOperand(1) == PromOps[i])
12969 SelectTruncOp[1].insert(std::make_pair(User,
12970 User->getOperand(1).getValueType()));
12971 }
12972 }
12973 }
12974
12975 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12976 bool ReallyNeedsExt = false;
12977 if (N->getOpcode() != ISD::ANY_EXTEND) {
12978 // If all of the inputs are not already sign/zero extended, then
12979 // we'll still need to do that at the end.
12980 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12981 if (isa<ConstantSDNode>(Inputs[i]))
12982 continue;
12983
12984 unsigned OpBits =
12985 Inputs[i].getOperand(0).getValueSizeInBits();
12986 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12987
12988 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12989 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12990 APInt::getHighBitsSet(OpBits,
12991 OpBits-PromBits))) ||
12992 (N->getOpcode() == ISD::SIGN_EXTEND &&
12993 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12994 (OpBits-(PromBits-1)))) {
12995 ReallyNeedsExt = true;
12996 break;
12997 }
12998 }
12999 }
13000
13001 // Replace all inputs, either with the truncation operand, or a
13002 // truncation or extension to the final output type.
13003 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13004 // Constant inputs need to be replaced with the to-be-promoted nodes that
13005 // use them because they might have users outside of the cluster of
13006 // promoted nodes.
13007 if (isa<ConstantSDNode>(Inputs[i]))
13008 continue;
13009
13010 SDValue InSrc = Inputs[i].getOperand(0);
13011 if (Inputs[i].getValueType() == N->getValueType(0))
13012 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13013 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13014 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13015 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13016 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13017 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13018 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13019 else
13020 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13021 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13022 }
13023
13024 std::list<HandleSDNode> PromOpHandles;
13025 for (auto &PromOp : PromOps)
13026 PromOpHandles.emplace_back(PromOp);
13027
13028 // Replace all operations (these are all the same, but have a different
13029 // (promoted) return type). DAG.getNode will validate that the types of
13030 // a binary operator match, so go through the list in reverse so that
13031 // we've likely promoted both operands first.
13032 while (!PromOpHandles.empty()) {
13033 SDValue PromOp = PromOpHandles.back().getValue();
13034 PromOpHandles.pop_back();
13035
13036 unsigned C;
13037 switch (PromOp.getOpcode()) {
13038 default: C = 0; break;
13039 case ISD::SELECT: C = 1; break;
13040 case ISD::SELECT_CC: C = 2; break;
13041 }
13042
13043 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13044 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13045 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13046 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13047 // The to-be-promoted operands of this node have not yet been
13048 // promoted (this should be rare because we're going through the
13049 // list backward, but if one of the operands has several users in
13050 // this cluster of to-be-promoted nodes, it is possible).
13051 PromOpHandles.emplace_front(PromOp);
13052 continue;
13053 }
13054
13055 // For SELECT and SELECT_CC nodes, we do a similar check for any
13056 // to-be-promoted comparison inputs.
13057 if (PromOp.getOpcode() == ISD::SELECT ||
13058 PromOp.getOpcode() == ISD::SELECT_CC) {
13059 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13060 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13061 (SelectTruncOp[1].count(PromOp.getNode()) &&
13062 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13063 PromOpHandles.emplace_front(PromOp);
13064 continue;
13065 }
13066 }
13067
13069 PromOp.getNode()->op_end());
13070
13071 // If this node has constant inputs, then they'll need to be promoted here.
13072 for (unsigned i = 0; i < 2; ++i) {
13073 if (!isa<ConstantSDNode>(Ops[C+i]))
13074 continue;
13075 if (Ops[C+i].getValueType() == N->getValueType(0))
13076 continue;
13077
13078 if (N->getOpcode() == ISD::SIGN_EXTEND)
13079 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13080 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13081 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13082 else
13083 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13084 }
13085
13086 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13087 // truncate them again to the original value type.
13088 if (PromOp.getOpcode() == ISD::SELECT ||
13089 PromOp.getOpcode() == ISD::SELECT_CC) {
13090 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13091 if (SI0 != SelectTruncOp[0].end())
13092 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13093 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13094 if (SI1 != SelectTruncOp[1].end())
13095 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13096 }
13097
13098 DAG.ReplaceAllUsesOfValueWith(PromOp,
13099 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13100 }
13101
13102 // Now we're left with the initial extension itself.
13103 if (!ReallyNeedsExt)
13104 return N->getOperand(0);
13105
13106 // To zero extend, just mask off everything except for the first bit (in the
13107 // i1 case).
13108 if (N->getOpcode() == ISD::ZERO_EXTEND)
13109 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13111 N->getValueSizeInBits(0), PromBits),
13112 dl, N->getValueType(0)));
13113
13114 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13115 "Invalid extension type");
13116 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13117 SDValue ShiftCst =
13118 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13119 return DAG.getNode(
13120 ISD::SRA, dl, N->getValueType(0),
13121 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13122 ShiftCst);
13123}
13124
13125SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13126 DAGCombinerInfo &DCI) const {
13127 assert(N->getOpcode() == ISD::SETCC &&
13128 "Should be called with a SETCC node");
13129
13130 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13131 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13132 SDValue LHS = N->getOperand(0);
13133 SDValue RHS = N->getOperand(1);
13134
13135 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13136 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13137 LHS.hasOneUse())
13138 std::swap(LHS, RHS);
13139
13140 // x == 0-y --> x+y == 0
13141 // x != 0-y --> x+y != 0
13142 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13143 RHS.hasOneUse()) {
13144 SDLoc DL(N);
13145 SelectionDAG &DAG = DCI.DAG;
13146 EVT VT = N->getValueType(0);
13147 EVT OpVT = LHS.getValueType();
13148 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13149 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13150 }
13151 }
13152
13153 return DAGCombineTruncBoolExt(N, DCI);
13154}
13155
13156// Is this an extending load from an f32 to an f64?
13157static bool isFPExtLoad(SDValue Op) {
13158 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13159 return LD->getExtensionType() == ISD::EXTLOAD &&
13160 Op.getValueType() == MVT::f64;
13161 return false;
13162}
13163
13164/// Reduces the number of fp-to-int conversion when building a vector.
13165///
13166/// If this vector is built out of floating to integer conversions,
13167/// transform it to a vector built out of floating point values followed by a
13168/// single floating to integer conversion of the vector.
13169/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13170/// becomes (fptosi (build_vector ($A, $B, ...)))
13171SDValue PPCTargetLowering::
13172combineElementTruncationToVectorTruncation(SDNode *N,
13173 DAGCombinerInfo &DCI) const {
13174 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13175 "Should be called with a BUILD_VECTOR node");
13176
13177 SelectionDAG &DAG = DCI.DAG;
13178 SDLoc dl(N);
13179
13180 SDValue FirstInput = N->getOperand(0);
13181 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13182 "The input operand must be an fp-to-int conversion.");
13183
13184 // This combine happens after legalization so the fp_to_[su]i nodes are
13185 // already converted to PPCSISD nodes.
13186 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13187 if (FirstConversion == PPCISD::FCTIDZ ||
13188 FirstConversion == PPCISD::FCTIDUZ ||
13189 FirstConversion == PPCISD::FCTIWZ ||
13190 FirstConversion == PPCISD::FCTIWUZ) {
13191 bool IsSplat = true;
13192 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13193 FirstConversion == PPCISD::FCTIWUZ;
13194 EVT SrcVT = FirstInput.getOperand(0).getValueType();
13196 EVT TargetVT = N->getValueType(0);
13197 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13198 SDValue NextOp = N->getOperand(i);
13199 if (NextOp.getOpcode() != PPCISD::MFVSR)
13200 return SDValue();
13201 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13202 if (NextConversion != FirstConversion)
13203 return SDValue();
13204 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13205 // This is not valid if the input was originally double precision. It is
13206 // also not profitable to do unless this is an extending load in which
13207 // case doing this combine will allow us to combine consecutive loads.
13208 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13209 return SDValue();
13210 if (N->getOperand(i) != FirstInput)
13211 IsSplat = false;
13212 }
13213
13214 // If this is a splat, we leave it as-is since there will be only a single
13215 // fp-to-int conversion followed by a splat of the integer. This is better
13216 // for 32-bit and smaller ints and neutral for 64-bit ints.
13217 if (IsSplat)
13218 return SDValue();
13219
13220 // Now that we know we have the right type of node, get its operands
13221 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13222 SDValue In = N->getOperand(i).getOperand(0);
13223 if (Is32Bit) {
13224 // For 32-bit values, we need to add an FP_ROUND node (if we made it
13225 // here, we know that all inputs are extending loads so this is safe).
13226 if (In.isUndef())
13227 Ops.push_back(DAG.getUNDEF(SrcVT));
13228 else {
13229 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13230 MVT::f32, In.getOperand(0),
13231 DAG.getIntPtrConstant(1, dl));
13232 Ops.push_back(Trunc);
13233 }
13234 } else
13235 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13236 }
13237
13238 unsigned Opcode;
13239 if (FirstConversion == PPCISD::FCTIDZ ||
13240 FirstConversion == PPCISD::FCTIWZ)
13241 Opcode = ISD::FP_TO_SINT;
13242 else
13243 Opcode = ISD::FP_TO_UINT;
13244
13245 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13246 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13247 return DAG.getNode(Opcode, dl, TargetVT, BV);
13248 }
13249 return SDValue();
13250}
13251
13252/// Reduce the number of loads when building a vector.
13253///
13254/// Building a vector out of multiple loads can be converted to a load
13255/// of the vector type if the loads are consecutive. If the loads are
13256/// consecutive but in descending order, a shuffle is added at the end
13257/// to reorder the vector.
13259 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13260 "Should be called with a BUILD_VECTOR node");
13261
13262 SDLoc dl(N);
13263
13264 // Return early for non byte-sized type, as they can't be consecutive.
13265 if (!N->getValueType(0).getVectorElementType().isByteSized())
13266 return SDValue();
13267
13268 bool InputsAreConsecutiveLoads = true;
13269 bool InputsAreReverseConsecutive = true;
13270 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13271 SDValue FirstInput = N->getOperand(0);
13272 bool IsRoundOfExtLoad = false;
13273
13274 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13275 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13276 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13277 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13278 }
13279 // Not a build vector of (possibly fp_rounded) loads.
13280 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13281 N->getNumOperands() == 1)
13282 return SDValue();
13283
13284 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13285 // If any inputs are fp_round(extload), they all must be.
13286 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13287 return SDValue();
13288
13289 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13290 N->getOperand(i);
13291 if (NextInput.getOpcode() != ISD::LOAD)
13292 return SDValue();
13293
13294 SDValue PreviousInput =
13295 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13296 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13297 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13298
13299 // If any inputs are fp_round(extload), they all must be.
13300 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13301 return SDValue();
13302
13303 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13304 InputsAreConsecutiveLoads = false;
13305 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13306 InputsAreReverseConsecutive = false;
13307
13308 // Exit early if the loads are neither consecutive nor reverse consecutive.
13309 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13310 return SDValue();
13311 }
13312
13313 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13314 "The loads cannot be both consecutive and reverse consecutive.");
13315
13316 SDValue FirstLoadOp =
13317 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13318 SDValue LastLoadOp =
13319 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13320 N->getOperand(N->getNumOperands()-1);
13321
13322 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13323 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13324 if (InputsAreConsecutiveLoads) {
13325 assert(LD1 && "Input needs to be a LoadSDNode.");
13326 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13327 LD1->getBasePtr(), LD1->getPointerInfo(),
13328 LD1->getAlignment());
13329 }
13330 if (InputsAreReverseConsecutive) {
13331 assert(LDL && "Input needs to be a LoadSDNode.");
13332 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13333 LDL->getBasePtr(), LDL->getPointerInfo(),
13334 LDL->getAlignment());
13336 for (int i = N->getNumOperands() - 1; i >= 0; i--)
13337 Ops.push_back(i);
13338
13339 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13340 DAG.getUNDEF(N->getValueType(0)), Ops);
13341 }
13342 return SDValue();
13343}
13344
13345// This function adds the required vector_shuffle needed to get
13346// the elements of the vector extract in the correct position
13347// as specified by the CorrectElems encoding.
13349 SDValue Input, uint64_t Elems,
13350 uint64_t CorrectElems) {
13351 SDLoc dl(N);
13352
13353 unsigned NumElems = Input.getValueType().getVectorNumElements();
13354 SmallVector<int, 16> ShuffleMask(NumElems, -1);
13355
13356 // Knowing the element indices being extracted from the original
13357 // vector and the order in which they're being inserted, just put
13358 // them at element indices required for the instruction.
13359 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13360 if (DAG.getDataLayout().isLittleEndian())
13361 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13362 else
13363 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13364 CorrectElems = CorrectElems >> 8;
13365 Elems = Elems >> 8;
13366 }
13367
13368 SDValue Shuffle =
13369 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13370 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13371
13372 EVT VT = N->getValueType(0);
13373 SDValue Conv = DAG.getBitcast(VT, Shuffle);
13374
13375 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13376 Input.getValueType().getVectorElementType(),
13378 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13379 DAG.getValueType(ExtVT));
13380}
13381
13382// Look for build vector patterns where input operands come from sign
13383// extended vector_extract elements of specific indices. If the correct indices
13384// aren't used, add a vector shuffle to fix up the indices and create
13385// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13386// during instruction selection.
13388 // This array encodes the indices that the vector sign extend instructions
13389 // extract from when extending from one type to another for both BE and LE.
13390 // The right nibble of each byte corresponds to the LE incides.
13391 // and the left nibble of each byte corresponds to the BE incides.
13392 // For example: 0x3074B8FC byte->word
13393 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13394 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13395 // For example: 0x000070F8 byte->double word
13396 // For LE: the allowed indices are: 0x0,0x8
13397 // For BE: the allowed indices are: 0x7,0xF
13398 uint64_t TargetElems[] = {
13399 0x3074B8FC, // b->w
13400 0x000070F8, // b->d
13401 0x10325476, // h->w
13402 0x00003074, // h->d
13403 0x00001032, // w->d
13404 };
13405
13406 uint64_t Elems = 0;
13407 int Index;
13408 SDValue Input;
13409
13410 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13411 if (!Op)
13412 return false;
13413 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13414 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13415 return false;
13416
13417 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13418 // of the right width.
13419 SDValue Extract = Op.getOperand(0);
13420 if (Extract.getOpcode() == ISD::ANY_EXTEND)
13421 Extract = Extract.getOperand(0);
13422 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13423 return false;
13424
13426 if (!ExtOp)
13427 return false;
13428
13429 Index = ExtOp->getZExtValue();
13430 if (Input && Input != Extract.getOperand(0))
13431 return false;
13432
13433 if (!Input)
13434 Input = Extract.getOperand(0);
13435
13436 Elems = Elems << 8;
13437 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13438 Elems |= Index;
13439
13440 return true;
13441 };
13442
13443 // If the build vector operands aren't sign extended vector extracts,
13444 // of the same input vector, then return.
13445 for (unsigned i = 0; i < N->getNumOperands(); i++) {
13446 if (!isSExtOfVecExtract(N->getOperand(i))) {
13447 return SDValue();
13448 }
13449 }
13450
13451 // If the vector extract indicies are not correct, add the appropriate
13452 // vector_shuffle.
13453 int TgtElemArrayIdx;
13454 int InputSize = Input.getValueType().getScalarSizeInBits();
13455 int OutputSize = N->getValueType(0).getScalarSizeInBits();
13456 if (InputSize + OutputSize == 40)
13457 TgtElemArrayIdx = 0;
13458 else if (InputSize + OutputSize == 72)
13459 TgtElemArrayIdx = 1;
13460 else if (InputSize + OutputSize == 48)
13461 TgtElemArrayIdx = 2;
13462 else if (InputSize + OutputSize == 80)
13463 TgtElemArrayIdx = 3;
13464 else if (InputSize + OutputSize == 96)
13465 TgtElemArrayIdx = 4;
13466 else
13467 return SDValue();
13468
13469 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13470 CorrectElems = DAG.getDataLayout().isLittleEndian()
13471 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13472 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13473 if (Elems != CorrectElems) {
13474 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13475 }
13476
13477 // Regular lowering will catch cases where a shuffle is not needed.
13478 return SDValue();
13479}
13480
13481// Look for the pattern of a load from a narrow width to i128, feeding
13482// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13483// (LXVRZX). This node represents a zero extending load that will be matched
13484// to the Load VSX Vector Rightmost instructions.
13486 SDLoc DL(N);
13487
13488 // This combine is only eligible for a BUILD_VECTOR of v1i128.
13489 if (N->getValueType(0) != MVT::v1i128)
13490 return SDValue();
13491
13492 SDValue Operand = N->getOperand(0);
13493 // Proceed with the transformation if the operand to the BUILD_VECTOR
13494 // is a load instruction.
13495 if (Operand.getOpcode() != ISD::LOAD)
13496 return SDValue();
13497
13498 LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13499 EVT MemoryType = LD->getMemoryVT();
13500
13501 // This transformation is only valid if the we are loading either a byte,
13502 // halfword, word, or doubleword.
13503 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13504 MemoryType == MVT::i32 || MemoryType == MVT::i64;
13505
13506 // Ensure that the load from the narrow width is being zero extended to i128.
13507 if (!ValidLDType ||
13508 (LD->getExtensionType() != ISD::ZEXTLOAD &&
13509 LD->getExtensionType() != ISD::EXTLOAD))
13510 return SDValue();
13511
13512 SDValue LoadOps[] = {
13513 LD->getChain(), LD->getBasePtr(),
13514 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13515
13518 LoadOps, MemoryType, LD->getMemOperand());
13519}
13520
13521SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13522 DAGCombinerInfo &DCI) const {
13523 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13524 "Should be called with a BUILD_VECTOR node");
13525
13526 SelectionDAG &DAG = DCI.DAG;
13527 SDLoc dl(N);
13528
13529 if (!Subtarget.hasVSX())
13530 return SDValue();
13531
13532 // The target independent DAG combiner will leave a build_vector of
13533 // float-to-int conversions intact. We can generate MUCH better code for
13534 // a float-to-int conversion of a vector of floats.
13535 SDValue FirstInput = N->getOperand(0);
13536 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13537 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13538 if (Reduced)
13539 return Reduced;
13540 }
13541
13542 // If we're building a vector out of consecutive loads, just load that
13543 // vector type.
13544 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13545 if (Reduced)
13546 return Reduced;
13547
13548 // If we're building a vector out of extended elements from another vector
13549 // we have P9 vector integer extend instructions. The code assumes legal
13550 // input types (i.e. it can't handle things like v4i16) so do not run before
13551 // legalization.
13552 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13553 Reduced = combineBVOfVecSExt(N, DAG);
13554 if (Reduced)
13555 return Reduced;
13556 }
13557
13558 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13559 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13560 // is a load from <valid narrow width> to i128.
13561 if (Subtarget.isISA3_1()) {
13562 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
13563 if (BVOfZLoad)
13564 return BVOfZLoad;
13565 }
13566
13567 if (N->getValueType(0) != MVT::v2f64)
13568 return SDValue();
13569
13570 // Looking for:
13571 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13572 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13573 FirstInput.getOpcode() != ISD::UINT_TO_FP)
13574 return SDValue();
13575 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13576 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13577 return SDValue();
13578 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13579 return SDValue();
13580
13581 SDValue Ext1 = FirstInput.getOperand(0);
13582 SDValue Ext2 = N->getOperand(1).getOperand(0);
13583 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13585 return SDValue();
13586
13589 if (!Ext1Op || !Ext2Op)
13590 return SDValue();
13591 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13592 Ext1.getOperand(0) != Ext2.getOperand(0))
13593 return SDValue();
13594
13595 int FirstElem = Ext1Op->getZExtValue();
13596 int SecondElem = Ext2Op->getZExtValue();
13597 int SubvecIdx;
13598 if (FirstElem == 0 && SecondElem == 1)
13599 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13600 else if (FirstElem == 2 && SecondElem == 3)
13601 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13602 else
13603 return SDValue();
13604
13605 SDValue SrcVec = Ext1.getOperand(0);
13606 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13608 return DAG.getNode(NodeType, dl, MVT::v2f64,
13609 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13610}
13611
13612SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13613 DAGCombinerInfo &DCI) const {
13614 assert((N->getOpcode() == ISD::SINT_TO_FP ||
13615 N->getOpcode() == ISD::UINT_TO_FP) &&
13616 "Need an int -> FP conversion node here");
13617
13618 if (useSoftFloat() || !Subtarget.has64BitSupport())
13619 return SDValue();
13620
13621 SelectionDAG &DAG = DCI.DAG;
13622 SDLoc dl(N);
13623 SDValue Op(N, 0);
13624
13625 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13626 // from the hardware.
13627 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13628 return SDValue();
13629 if (!Op.getOperand(0).getValueType().isSimple())
13630 return SDValue();
13631 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13632 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13633 return SDValue();
13634
13635 SDValue FirstOperand(Op.getOperand(0));
13636 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13637 (FirstOperand.getValueType() == MVT::i8 ||
13638 FirstOperand.getValueType() == MVT::i16);
13639 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13640 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13641 bool DstDouble = Op.getValueType() == MVT::f64;
13642 unsigned ConvOp = Signed ?
13643 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
13644 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13645 SDValue WidthConst =
13646 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13647 dl, false);
13648 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13649 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13652 Ops, MVT::i8, LDN->getMemOperand());
13653
13654 // For signed conversion, we need to sign-extend the value in the VSR
13655 if (Signed) {
13656 SDValue ExtOps[] = { Ld, WidthConst };
13657 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13658 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13659 } else
13660 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13661 }
13662
13663
13664 // For i32 intermediate values, unfortunately, the conversion functions
13665 // leave the upper 32 bits of the value are undefined. Within the set of
13666 // scalar instructions, we have no method for zero- or sign-extending the
13667 // value. Thus, we cannot handle i32 intermediate values here.
13668 if (Op.getOperand(0).getValueType() == MVT::i32)
13669 return SDValue();
13670
13671 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13672 "UINT_TO_FP is supported only with FPCVT");
13673
13674 // If we have FCFIDS, then use it when converting to single-precision.
13675 // Otherwise, convert to double-precision and then round.
13676 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13677 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13679 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13680 : PPCISD::FCFID);
13681 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13682 ? MVT::f32
13683 : MVT::f64;
13684
13685 // If we're converting from a float, to an int, and back to a float again,
13686 // then we don't need the store/load pair at all.
13687 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13688 Subtarget.hasFPCVT()) ||
13689 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13690 SDValue Src = Op.getOperand(0).getOperand(0);
13691 if (Src.getValueType() == MVT::f32) {
13692 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13693 DCI.AddToWorklist(Src.getNode());
13694 } else if (Src.getValueType() != MVT::f64) {
13695 // Make sure that we don't pick up a ppc_fp128 source value.
13696 return SDValue();
13697 }
13698
13699 unsigned FCTOp =
13700 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13702
13703 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13704 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13705
13706 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13707 FP = DAG.getNode(ISD::FP_ROUND, dl,
13708 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13709 DCI.AddToWorklist(FP.getNode());
13710 }
13711
13712 return FP;
13713 }
13714
13715 return SDValue();
13716}
13717
13718// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13719// builtins) into loads with swaps.
13721 DAGCombinerInfo &DCI) const {
13722 SelectionDAG &DAG = DCI.DAG;
13723 SDLoc dl(N);
13724 SDValue Chain;
13725 SDValue Base;
13726 MachineMemOperand *MMO;
13727
13728 switch (N->getOpcode()) {
13729 default:
13730 llvm_unreachable("Unexpected opcode for little endian VSX load");
13731 case ISD::LOAD: {
13733 Chain = LD->getChain();
13734 Base = LD->getBasePtr();
13735 MMO = LD->getMemOperand();
13736 // If the MMO suggests this isn't a load of a full vector, leave
13737 // things alone. For a built-in, we have to make the change for
13738 // correctness, so if there is a size problem that will be a bug.
13739 if (MMO->getSize() < 16)
13740 return SDValue();
13741 break;
13742 }
13745 Chain = Intrin->getChain();
13746 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13747 // us what we want. Get operand 2 instead.
13748 Base = Intrin->getOperand(2);
13749 MMO = Intrin->getMemOperand();
13750 break;
13751 }
13752 }
13753
13754 MVT VecTy = N->getValueType(0).getSimpleVT();
13755
13756 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13757 // aligned and the type is a vector with elements up to 4 bytes
13758 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13759 VecTy.getScalarSizeInBits() <= 32) {
13760 return SDValue();
13761 }
13762
13763 SDValue LoadOps[] = { Chain, Base };
13766 LoadOps, MVT::v2f64, MMO);
13767
13768 DCI.AddToWorklist(Load.getNode());
13769 Chain = Load.getValue(1);
13770 SDValue Swap = DAG.getNode(
13771 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13772 DCI.AddToWorklist(Swap.getNode());
13773
13774 // Add a bitcast if the resulting load type doesn't match v2f64.
13775 if (VecTy != MVT::v2f64) {
13776 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13777 DCI.AddToWorklist(N.getNode());
13778 // Package {bitcast value, swap's chain} to match Load's shape.
13779 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13780 N, Swap.getValue(1));
13781 }
13782
13783 return Swap;
13784}
13785
13786// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13787// builtins) into stores with swaps.
13789 DAGCombinerInfo &DCI) const {
13790 SelectionDAG &DAG = DCI.DAG;
13791 SDLoc dl(N);
13792 SDValue Chain;
13793 SDValue Base;
13794 unsigned SrcOpnd;
13795 MachineMemOperand *MMO;
13796
13797 switch (N->getOpcode()) {
13798 default:
13799 llvm_unreachable("Unexpected opcode for little endian VSX store");
13800 case ISD::STORE: {
13802 Chain = ST->getChain();
13803 Base = ST->getBasePtr();
13804 MMO = ST->getMemOperand();
13805 SrcOpnd = 1;
13806 // If the MMO suggests this isn't a store of a full vector, leave
13807 // things alone. For a built-in, we have to make the change for
13808 // correctness, so if there is a size problem that will be a bug.
13809 if (MMO->getSize() < 16)
13810 return SDValue();
13811 break;
13812 }
13813 case ISD::INTRINSIC_VOID: {
13815 Chain = Intrin->getChain();
13816 // Intrin->getBasePtr() oddly does not get what we want.
13817 Base = Intrin->getOperand(3);
13818 MMO = Intrin->getMemOperand();
13819 SrcOpnd = 2;
13820 break;
13821 }
13822 }
13823
13824 SDValue Src = N->getOperand(SrcOpnd);
13825 MVT VecTy = Src.getValueType().getSimpleVT();
13826
13827 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13828 // aligned and the type is a vector with elements up to 4 bytes
13829 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13830 VecTy.getScalarSizeInBits() <= 32) {
13831 return SDValue();
13832 }
13833
13834 // All stores are done as v2f64 and possible bit cast.
13835 if (VecTy != MVT::v2f64) {
13836 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13837 DCI.AddToWorklist(Src.getNode());
13838 }
13839
13840 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13841 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13842 DCI.AddToWorklist(Swap.getNode());
13843 Chain = Swap.getValue(1);
13844 SDValue StoreOps[] = { Chain, Swap, Base };
13846 DAG.getVTList(MVT::Other),
13847 StoreOps, VecTy, MMO);
13848 DCI.AddToWorklist(Store.getNode());
13849 return Store;
13850}
13851
13852// Handle DAG combine for STORE (FP_TO_INT F).
13853SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13854 DAGCombinerInfo &DCI) const {
13855
13856 SelectionDAG &DAG = DCI.DAG;
13857 SDLoc dl(N);
13858 unsigned Opcode = N->getOperand(1).getOpcode();
13859
13860 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13861 && "Not a FP_TO_INT Instruction!");
13862
13863 SDValue Val = N->getOperand(1).getOperand(0);
13864 EVT Op1VT = N->getOperand(1).getValueType();
13865 EVT ResVT = Val.getValueType();
13866
13867 if (!isTypeLegal(ResVT))
13868 return SDValue();
13869
13870 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13871 bool ValidTypeForStoreFltAsInt =
13872 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13873 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13874
13875 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
13876 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13877 return SDValue();
13878
13879 // Extend f32 values to f64
13880 if (ResVT.getScalarSizeInBits() == 32) {
13881 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13882 DCI.AddToWorklist(Val.getNode());
13883 }
13884
13885 // Set signed or unsigned conversion opcode.
13886 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13888 PPCISD::FP_TO_UINT_IN_VSR;
13889
13890 Val = DAG.getNode(ConvOpcode,
13891 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13892 DCI.AddToWorklist(Val.getNode());
13893
13894 // Set number of bytes being converted.
13895 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13896 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13897 DAG.getIntPtrConstant(ByteSize, dl, false),
13898 DAG.getValueType(Op1VT) };
13899
13901 DAG.getVTList(MVT::Other), Ops,
13902 cast<StoreSDNode>(N)->getMemoryVT(),
13903 cast<StoreSDNode>(N)->getMemOperand());
13904
13905 DCI.AddToWorklist(Val.getNode());
13906 return Val;
13907}
13908
13909static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
13910 // Check that the source of the element keeps flipping
13911 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
13912 bool PrevElemFromFirstVec = Mask[0] < NumElts;
13913 for (int i = 1, e = Mask.size(); i < e; i++) {
13914 if (PrevElemFromFirstVec && Mask[i] < NumElts)
13915 return false;
13916 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
13917 return false;
13918 PrevElemFromFirstVec = !PrevElemFromFirstVec;
13919 }
13920 return true;
13921}
13922
13923static bool isSplatBV(SDValue Op) {
13924 if (Op.getOpcode() != ISD::BUILD_VECTOR)
13925 return false;
13926 SDValue FirstOp;
13927
13928 // Find first non-undef input.
13929 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
13930 FirstOp = Op.getOperand(i);
13931 if (!FirstOp.isUndef())
13932 break;
13933 }
13934
13935 // All inputs are undef or the same as the first non-undef input.
13936 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
13937 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
13938 return false;
13939 return true;
13940}
13941
13943 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13944 return Op;
13945 if (Op.getOpcode() != ISD::BITCAST)
13946 return SDValue();
13947 Op = Op.getOperand(0);
13948 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13949 return Op;
13950 return SDValue();
13951}
13952
13954 int LHSMaxIdx, int RHSMinIdx,
13955 int RHSMaxIdx, int HalfVec) {
13956 for (int i = 0, e = ShuffV.size(); i < e; i++) {
13957 int Idx = ShuffV[i];
13958 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
13959 ShuffV[i] += HalfVec;
13960 }
13961}
13962
13963// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
13964// the original is:
13965// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
13966// In such a case, just change the shuffle mask to extract the element
13967// from the permuted index.
13969 SDLoc dl(OrigSToV);
13970 EVT VT = OrigSToV.getValueType();
13971 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13972 "Expecting a SCALAR_TO_VECTOR here");
13973 SDValue Input = OrigSToV.getOperand(0);
13974
13975 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13976 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
13977 SDValue OrigVector = Input.getOperand(0);
13978
13979 // Can't handle non-const element indices or different vector types
13980 // for the input to the extract and the output of the scalar_to_vector.
13981 if (Idx && VT == OrigVector.getValueType()) {
13983 NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
13984 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
13985 }
13986 }
13987 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
13988 OrigSToV.getOperand(0));
13989}
13990
13991// On little endian subtargets, combine shuffles such as:
13992// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
13993// into:
13994// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
13995// because the latter can be matched to a single instruction merge.
13996// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
13997// to put the value into element zero. Adjust the shuffle mask so that the
13998// vector can remain in permuted form (to prevent a swap prior to a shuffle).
13999SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14000 SelectionDAG &DAG) const {
14001 SDValue LHS = SVN->getOperand(0);
14002 SDValue RHS = SVN->getOperand(1);
14003 auto Mask = SVN->getMask();
14004 int NumElts = LHS.getValueType().getVectorNumElements();
14005 SDValue Res(SVN, 0);
14006 SDLoc dl(SVN);
14007
14008 // None of these combines are useful on big endian systems since the ISA
14009 // already has a big endian bias.
14010 if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14011 return Res;
14012
14013 // If this is not a shuffle of a shuffle and the first element comes from
14014 // the second vector, canonicalize to the commuted form. This will make it
14015 // more likely to match one of the single instruction patterns.
14016 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14017 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14018 std::swap(LHS, RHS);
14019 Res = DAG.getCommutedVectorShuffle(*SVN);
14020 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14021 }
14022
14023 // Adjust the shuffle mask if either input vector comes from a
14024 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14025 // form (to prevent the need for a swap).
14026 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14027 SDValue SToVLHS = isScalarToVec(LHS);
14028 SDValue SToVRHS = isScalarToVec(RHS);
14029 if (SToVLHS || SToVRHS) {
14030 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14031 : SToVRHS.getValueType().getVectorNumElements();
14032 int NumEltsOut = ShuffV.size();
14033
14034 // Initially assume that neither input is permuted. These will be adjusted
14035 // accordingly if either input is.
14036 int LHSMaxIdx = -1;
14037 int RHSMinIdx = -1;
14038 int RHSMaxIdx = -1;
14039 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14040
14041 // Get the permuted scalar to vector nodes for the source(s) that come from
14042 // ISD::SCALAR_TO_VECTOR.
14043 if (SToVLHS) {
14044 // Set up the values for the shuffle vector fixup.
14045 LHSMaxIdx = NumEltsOut / NumEltsIn;
14046 SToVLHS = getSToVPermuted(SToVLHS, DAG);
14047 if (SToVLHS.getValueType() != LHS.getValueType())
14048 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14049 LHS = SToVLHS;
14050 }
14051 if (SToVRHS) {
14052 RHSMinIdx = NumEltsOut;
14053 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14054 SToVRHS = getSToVPermuted(SToVRHS, DAG);
14055 if (SToVRHS.getValueType() != RHS.getValueType())
14056 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14057 RHS = SToVRHS;
14058 }
14059
14060 // Fix up the shuffle mask to reflect where the desired element actually is.
14061 // The minimum and maximum indices that correspond to element zero for both
14062 // the LHS and RHS are computed and will control which shuffle mask entries
14063 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14064 // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14065 // HalfVec to refer to the corresponding element in the permuted vector.
14066 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14067 HalfVec);
14068 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14069
14070 // We may have simplified away the shuffle. We won't be able to do anything
14071 // further with it here.
14072 if (!isa<ShuffleVectorSDNode>(Res))
14073 return Res;
14074 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14075 }
14076
14077 // The common case after we commuted the shuffle is that the RHS is a splat
14078 // and we have elements coming in from the splat at indices that are not
14079 // conducive to using a merge.
14080 // Example:
14081 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14082 if (!isSplatBV(RHS))
14083 return Res;
14084
14085 // We are looking for a mask such that all even elements are from
14086 // one vector and all odd elements from the other.
14087 if (!isAlternatingShuffMask(Mask, NumElts))
14088 return Res;
14089
14090 // Adjust the mask so we are pulling in the same index from the splat
14091 // as the index from the interesting vector in consecutive elements.
14092 // Example (even elements from first vector):
14093 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14094 if (Mask[0] < NumElts)
14095 for (int i = 1, e = Mask.size(); i < e; i += 2)
14096 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14097 // Example (odd elements from first vector):
14098 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14099 else
14100 for (int i = 0, e = Mask.size(); i < e; i += 2)
14101 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14102
14103 // If the RHS has undefs, we need to remove them since we may have created
14104 // a shuffle that adds those instead of the splat value.
14105 SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14106 RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14107
14108 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14109 return Res;
14110}
14111
14112SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14113 LSBaseSDNode *LSBase,
14114 DAGCombinerInfo &DCI) const {
14115 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14116 "Not a reverse memop pattern!");
14117
14118 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14119 auto Mask = SVN->getMask();
14120 int i = 0;
14121 auto I = Mask.rbegin();
14122 auto E = Mask.rend();
14123
14124 for (; I != E; ++I) {
14125 if (*I != i)
14126 return false;
14127 i++;
14128 }
14129 return true;
14130 };
14131
14132 SelectionDAG &DAG = DCI.DAG;
14133 EVT VT = SVN->getValueType(0);
14134
14135 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14136 return SDValue();
14137
14138 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14139 // See comment in PPCVSXSwapRemoval.cpp.
14140 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14141 if (!Subtarget.hasP9Vector())
14142 return SDValue();
14143
14144 if(!IsElementReverse(SVN))
14145 return SDValue();
14146
14147 if (LSBase->getOpcode() == ISD::LOAD) {
14148 SDLoc dl(SVN);
14149 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14150 return DAG.getMemIntrinsicNode(
14151 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14152 LSBase->getMemoryVT(), LSBase->getMemOperand());
14153 }
14154
14155 if (LSBase->getOpcode() == ISD::STORE) {
14156 SDLoc dl(LSBase);
14157 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14158 LSBase->getBasePtr()};
14159 return DAG.getMemIntrinsicNode(
14160 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14161 LSBase->getMemoryVT(), LSBase->getMemOperand());
14162 }
14163
14164 llvm_unreachable("Expected a load or store node here");
14165}
14166
14168 DAGCombinerInfo &DCI) const {
14169 SelectionDAG &DAG = DCI.DAG;
14170 SDLoc dl(N);
14171 switch (N->getOpcode()) {
14172 default: break;
14173 case ISD::ADD:
14174 return combineADD(N, DCI);
14175 case ISD::SHL:
14176 return combineSHL(N, DCI);
14177 case ISD::SRA:
14178 return combineSRA(N, DCI);
14179 case ISD::SRL:
14180 return combineSRL(N, DCI);
14181 case ISD::MUL:
14182 return combineMUL(N, DCI);
14183 case ISD::FMA:
14184 case PPCISD::FNMSUB:
14185 return combineFMALike(N, DCI);
14186 case PPCISD::SHL:
14187 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14188 return N->getOperand(0);
14189 break;
14190 case PPCISD::SRL:
14191 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14192 return N->getOperand(0);
14193 break;
14194 case PPCISD::SRA:
14195 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14196 if (C->isNullValue() || // 0 >>s V -> 0.
14197 C->isAllOnesValue()) // -1 >>s V -> -1.
14198 return N->getOperand(0);
14199 }
14200 break;
14201 case ISD::SIGN_EXTEND:
14202 case ISD::ZERO_EXTEND:
14203 case ISD::ANY_EXTEND:
14204 return DAGCombineExtBoolTrunc(N, DCI);
14205 case ISD::TRUNCATE:
14206 return combineTRUNCATE(N, DCI);
14207 case ISD::SETCC:
14208 if (SDValue CSCC = combineSetCC(N, DCI))
14209 return CSCC;
14211 case ISD::SELECT_CC:
14212 return DAGCombineTruncBoolExt(N, DCI);
14213 case ISD::SINT_TO_FP:
14214 case ISD::UINT_TO_FP:
14215 return combineFPToIntToFP(N, DCI);
14217 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14218 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14219 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14220 }
14221 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14222 case ISD::STORE: {
14223
14224 EVT Op1VT = N->getOperand(1).getValueType();
14225 unsigned Opcode = N->getOperand(1).getOpcode();
14226
14227 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14228 SDValue Val= combineStoreFPToInt(N, DCI);
14229 if (Val)
14230 return Val;
14231 }
14232
14233 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14234 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14235 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14236 if (Val)
14237 return Val;
14238 }
14239
14240 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14241 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14242 N->getOperand(1).getNode()->hasOneUse() &&
14243 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14244 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14245
14246 // STBRX can only handle simple types and it makes no sense to store less
14247 // two bytes in byte-reversed order.
14248 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14249 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14250 break;
14251
14252 SDValue BSwapOp = N->getOperand(1).getOperand(0);
14253 // Do an any-extend to 32-bits if this is a half-word input.
14254 if (BSwapOp.getValueType() == MVT::i16)
14255 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14256
14257 // If the type of BSWAP operand is wider than stored memory width
14258 // it need to be shifted to the right side before STBRX.
14259 if (Op1VT.bitsGT(mVT)) {
14260 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14261 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14262 DAG.getConstant(Shift, dl, MVT::i32));
14263 // Need to truncate if this is a bswap of i64 stored as i32/i16.
14264 if (Op1VT == MVT::i64)
14265 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14266 }
14267
14268 SDValue Ops[] = {
14269 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14270 };
14271 return
14273 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14274 cast<StoreSDNode>(N)->getMemOperand());
14275 }
14276
14277 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14278 // So it can increase the chance of CSE constant construction.
14279 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14280 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14281 // Need to sign-extended to 64-bits to handle negative values.
14282 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14283 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14284 MemVT.getSizeInBits());
14285 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14286
14287 // DAG.getTruncStore() can't be used here because it doesn't accept
14288 // the general (base + offset) addressing mode.
14289 // So we use UpdateNodeOperands and setTruncatingStore instead.
14290 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14291 N->getOperand(3));
14292 cast<StoreSDNode>(N)->setTruncatingStore(true);
14293 return SDValue(N, 0);
14294 }
14295
14296 // For little endian, VSX stores require generating xxswapd/lxvd2x.
14297 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14298 if (Op1VT.isSimple()) {
14299 MVT StoreVT = Op1VT.getSimpleVT();
14300 if (Subtarget.needsSwapsForVSXMemOps() &&
14301 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14302 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14303 return expandVSXStoreForLE(N, DCI);
14304 }
14305 break;
14306 }
14307 case ISD::LOAD: {
14309 EVT VT = LD->getValueType(0);
14310
14311 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14312 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14313 if (VT.isSimple()) {
14314 MVT LoadVT = VT.getSimpleVT();
14315 if (Subtarget.needsSwapsForVSXMemOps() &&
14316 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14317 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14318 return expandVSXLoadForLE(N, DCI);
14319 }
14320
14321 // We sometimes end up with a 64-bit integer load, from which we extract
14322 // two single-precision floating-point numbers. This happens with
14323 // std::complex<float>, and other similar structures, because of the way we
14324 // canonicalize structure copies. However, if we lack direct moves,
14325 // then the final bitcasts from the extracted integer values to the
14326 // floating-point numbers turn into store/load pairs. Even with direct moves,
14327 // just loading the two floating-point numbers is likely better.
14328 auto ReplaceTwoFloatLoad = [&]() {
14329 if (VT != MVT::i64)
14330 return false;
14331
14332 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14333 LD->isVolatile())
14334 return false;
14335
14336 // We're looking for a sequence like this:
14337 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14338 // t16: i64 = srl t13, Constant:i32<32>
14339 // t17: i32 = truncate t16
14340 // t18: f32 = bitcast t17
14341 // t19: i32 = truncate t13
14342 // t20: f32 = bitcast t19
14343
14344 if (!LD->hasNUsesOfValue(2, 0))
14345 return false;
14346
14347 auto UI = LD->use_begin();
14348 while (UI.getUse().getResNo() != 0) ++UI;
14349 SDNode *Trunc = *UI++;
14350 while (UI.getUse().getResNo() != 0) ++UI;
14351 SDNode *RightShift = *UI;
14352 if (Trunc->getOpcode() != ISD::TRUNCATE)
14353 std::swap(Trunc, RightShift);
14354
14355 if (Trunc->getOpcode() != ISD::TRUNCATE ||
14356 Trunc->getValueType(0) != MVT::i32 ||
14357 !Trunc->hasOneUse())
14358 return false;
14359 if (RightShift->getOpcode() != ISD::SRL ||
14360 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14361 RightShift->getConstantOperandVal(1) != 32 ||
14362 !RightShift->hasOneUse())
14363 return false;
14364
14365 SDNode *Trunc2 = *RightShift->use_begin();
14366 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14367 Trunc2->getValueType(0) != MVT::i32 ||
14368 !Trunc2->hasOneUse())
14369 return false;
14370
14371 SDNode *Bitcast = *Trunc->use_begin();
14372 SDNode *Bitcast2 = *Trunc2->use_begin();
14373
14374 if (Bitcast->getOpcode() != ISD::BITCAST ||
14375 Bitcast->getValueType(0) != MVT::f32)
14376 return false;
14377 if (Bitcast2->getOpcode() != ISD::BITCAST ||
14378 Bitcast2->getValueType(0) != MVT::f32)
14379 return false;
14380
14381 if (Subtarget.isLittleEndian())
14382 std::swap(Bitcast, Bitcast2);
14383
14384 // Bitcast has the second float (in memory-layout order) and Bitcast2
14385 // has the first one.
14386
14387 SDValue BasePtr = LD->getBasePtr();
14388 if (LD->isIndexed()) {
14389 assert(LD->getAddressingMode() == ISD::PRE_INC &&
14390 "Non-pre-inc AM on PPC?");
14391 BasePtr =
14392 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14393 LD->getOffset());
14394 }
14395
14396 auto MMOFlags =
14397 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14398 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14399 LD->getPointerInfo(), LD->getAlignment(),
14400 MMOFlags, LD->getAAInfo());
14401 SDValue AddPtr =
14402 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14403 BasePtr, DAG.getIntPtrConstant(4, dl));
14404 SDValue FloatLoad2 = DAG.getLoad(
14405 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14406 LD->getPointerInfo().getWithOffset(4),
14407 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14408
14409 if (LD->isIndexed()) {
14410 // Note that DAGCombine should re-form any pre-increment load(s) from
14411 // what is produced here if that makes sense.
14412 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14413 }
14414
14415 DCI.CombineTo(Bitcast2, FloatLoad);
14416 DCI.CombineTo(Bitcast, FloatLoad2);
14417
14418 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14419 SDValue(FloatLoad2.getNode(), 1));
14420 return true;
14421 };
14422
14423 if (ReplaceTwoFloatLoad())
14424 return SDValue(N, 0);
14425
14426 EVT MemVT = LD->getMemoryVT();
14427 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14428 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14429 if (LD->isUnindexed() && VT.isVector() &&
14430 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14431 // P8 and later hardware should just use LOAD.
14432 !Subtarget.hasP8Vector() &&
14433 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14434 VT == MVT::v4f32))) &&
14435 LD->getAlign() < ABIAlignment) {
14436 // This is a type-legal unaligned Altivec load.
14437 SDValue Chain = LD->getChain();
14438 SDValue Ptr = LD->getBasePtr();
14439 bool isLittleEndian = Subtarget.isLittleEndian();
14440
14441 // This implements the loading of unaligned vectors as described in
14442 // the venerable Apple Velocity Engine overview. Specifically:
14443 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14444 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14445 //
14446 // The general idea is to expand a sequence of one or more unaligned
14447 // loads into an alignment-based permutation-control instruction (lvsl
14448 // or lvsr), a series of regular vector loads (which always truncate
14449 // their input address to an aligned address), and a series of
14450 // permutations. The results of these permutations are the requested
14451 // loaded values. The trick is that the last "extra" load is not taken
14452 // from the address you might suspect (sizeof(vector) bytes after the
14453 // last requested load), but rather sizeof(vector) - 1 bytes after the
14454 // last requested vector. The point of this is to avoid a page fault if
14455 // the base address happened to be aligned. This works because if the
14456 // base address is aligned, then adding less than a full vector length
14457 // will cause the last vector in the sequence to be (re)loaded.
14458 // Otherwise, the next vector will be fetched as you might suspect was
14459 // necessary.
14460
14461 // We might be able to reuse the permutation generation from
14462 // a different base address offset from this one by an aligned amount.
14463 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14464 // optimization later.
14465 Intrinsic::ID Intr, IntrLD, IntrPerm;
14466 MVT PermCntlTy, PermTy, LDTy;
14467 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14468 : Intrinsic::ppc_altivec_lvsl;
14469 IntrLD = Intrinsic::ppc_altivec_lvx;
14470 IntrPerm = Intrinsic::ppc_altivec_vperm;
14471 PermCntlTy = MVT::v16i8;
14472 PermTy = MVT::v4i32;
14473 LDTy = MVT::v4i32;
14474
14475 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14476
14477 // Create the new MMO for the new base load. It is like the original MMO,
14478 // but represents an area in memory almost twice the vector size centered
14479 // on the original address. If the address is unaligned, we might start
14480 // reading up to (sizeof(vector)-1) bytes below the address of the
14481 // original unaligned load.
14483 MachineMemOperand *BaseMMO =
14484 MF.getMachineMemOperand(LD->getMemOperand(),
14485 -(long)MemVT.getStoreSize()+1,
14486 2*MemVT.getStoreSize()-1);
14487
14488 // Create the new base load.
14489 SDValue LDXIntID =
14490 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14491 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14492 SDValue BaseLoad =
14494 DAG.getVTList(PermTy, MVT::Other),
14495 BaseLoadOps, LDTy, BaseMMO);
14496
14497 // Note that the value of IncOffset (which is provided to the next
14498 // load's pointer info offset value, and thus used to calculate the
14499 // alignment), and the value of IncValue (which is actually used to
14500 // increment the pointer value) are different! This is because we
14501 // require the next load to appear to be aligned, even though it
14502 // is actually offset from the base pointer by a lesser amount.
14503 int IncOffset = VT.getSizeInBits() / 8;
14504 int IncValue = IncOffset;
14505
14506 // Walk (both up and down) the chain looking for another load at the real
14507 // (aligned) offset (the alignment of the other load does not matter in
14508 // this case). If found, then do not use the offset reduction trick, as
14509 // that will prevent the loads from being later combined (as they would
14510 // otherwise be duplicates).
14511 if (!findConsecutiveLoad(LD, DAG))
14512 --IncValue;
14513
14514 SDValue Increment =
14515 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14516 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14517
14518 MachineMemOperand *ExtraMMO =
14519 MF.getMachineMemOperand(LD->getMemOperand(),
14520 1, 2*MemVT.getStoreSize()-1);
14521 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14522 SDValue ExtraLoad =
14524 DAG.getVTList(PermTy, MVT::Other),
14525 ExtraLoadOps, LDTy, ExtraMMO);
14526
14528 BaseLoad.getValue(1), ExtraLoad.getValue(1));
14529
14530 // Because vperm has a big-endian bias, we must reverse the order
14531 // of the input vectors and complement the permute control vector
14532 // when generating little endian code. We have already handled the
14533 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14534 // and ExtraLoad here.
14535 SDValue Perm;
14536 if (isLittleEndian)
14537 Perm = BuildIntrinsicOp(IntrPerm,
14538 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14539 else
14540 Perm = BuildIntrinsicOp(IntrPerm,
14541 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14542
14543 if (VT != PermTy)
14544 Perm = Subtarget.hasAltivec()
14545 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14546 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14547 DAG.getTargetConstant(1, dl, MVT::i64));
14548 // second argument is 1 because this rounding
14549 // is always exact.
14550
14551 // The output of the permutation is our loaded result, the TokenFactor is
14552 // our new chain.
14553 DCI.CombineTo(N, Perm, TF);
14554 return SDValue(N, 0);
14555 }
14556 }
14557 break;
14559 bool isLittleEndian = Subtarget.isLittleEndian();
14560 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14561 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14562 : Intrinsic::ppc_altivec_lvsl);
14563 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14564 SDValue Add = N->getOperand(1);
14565
14566 int Bits = 4 /* 16 byte alignment */;
14567
14568 if (DAG.MaskedValueIsZero(Add->getOperand(1),
14569 APInt::getAllOnesValue(Bits /* alignment */)
14570 .zext(Add.getScalarValueSizeInBits()))) {
14571 SDNode *BasePtr = Add->getOperand(0).getNode();
14572 for (SDNode::use_iterator UI = BasePtr->use_begin(),
14573 UE = BasePtr->use_end();
14574 UI != UE; ++UI) {
14575 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14576 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14577 IID) {
14578 // We've found another LVSL/LVSR, and this address is an aligned
14579 // multiple of that one. The results will be the same, so use the
14580 // one we've just found instead.
14581
14582 return SDValue(*UI, 0);
14583 }
14584 }
14585 }
14586
14587 if (isa<ConstantSDNode>(Add->getOperand(1))) {
14588 SDNode *BasePtr = Add->getOperand(0).getNode();
14589 for (SDNode::use_iterator UI = BasePtr->use_begin(),
14590 UE = BasePtr->use_end(); UI != UE; ++UI) {
14591 if (UI->getOpcode() == ISD::ADD &&
14592 isa<ConstantSDNode>(UI->getOperand(1)) &&
14593 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14594 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14595 (1ULL << Bits) == 0) {
14596 SDNode *OtherAdd = *UI;
14597 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14598 VE = OtherAdd->use_end(); VI != VE; ++VI) {
14599 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14600 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14601 return SDValue(*VI, 0);
14602 }
14603 }
14604 }
14605 }
14606 }
14607 }
14608
14609 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14610 // Expose the vabsduw/h/b opportunity for down stream
14611 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14612 (IID == Intrinsic::ppc_altivec_vmaxsw ||
14613 IID == Intrinsic::ppc_altivec_vmaxsh ||
14614 IID == Intrinsic::ppc_altivec_vmaxsb)) {
14615 SDValue V1 = N->getOperand(1);
14616 SDValue V2 = N->getOperand(2);
14617 if ((V1.getSimpleValueType() == MVT::v4i32 ||
14619 V1.getSimpleValueType() == MVT::v16i8) &&
14620 V1.getSimpleValueType() == V2.getSimpleValueType()) {
14621 // (0-a, a)
14622 if (V1.getOpcode() == ISD::SUB &&
14624 V1.getOperand(1) == V2) {
14625 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14626 }
14627 // (a, 0-a)
14628 if (V2.getOpcode() == ISD::SUB &&
14629 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14630 V2.getOperand(1) == V1) {
14631 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14632 }
14633 // (x-y, y-x)
14634 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14635 V1.getOperand(0) == V2.getOperand(1) &&
14636 V1.getOperand(1) == V2.getOperand(0)) {
14637 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14638 }
14639 }
14640 }
14641 }
14642
14643 break;
14645 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14646 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14647 if (Subtarget.needsSwapsForVSXMemOps()) {
14648 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14649 default:
14650 break;
14651 case Intrinsic::ppc_vsx_lxvw4x:
14652 case Intrinsic::ppc_vsx_lxvd2x:
14653 return expandVSXLoadForLE(N, DCI);
14654 }
14655 }
14656 break;
14658 // For little endian, VSX stores require generating xxswapd/stxvd2x.
14659 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14660 if (Subtarget.needsSwapsForVSXMemOps()) {
14661 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14662 default:
14663 break;
14664 case Intrinsic::ppc_vsx_stxvw4x:
14665 case Intrinsic::ppc_vsx_stxvd2x:
14666 return expandVSXStoreForLE(N, DCI);
14667 }
14668 }
14669 break;
14670 case ISD::BSWAP:
14671 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14672 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14673 N->getOperand(0).hasOneUse() &&
14674 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14675 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14676 N->getValueType(0) == MVT::i64))) {
14677 SDValue Load = N->getOperand(0);
14678 LoadSDNode *LD = cast<LoadSDNode>(Load);
14679 // Create the byte-swapping load.
14680 SDValue Ops[] = {
14681 LD->getChain(), // Chain
14682 LD->getBasePtr(), // Ptr
14683 DAG.getValueType(N->getValueType(0)) // VT
14684 };
14685 SDValue BSLoad =
14687 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14689 Ops, LD->getMemoryVT(), LD->getMemOperand());
14690
14691 // If this is an i16 load, insert the truncate.
14692 SDValue ResVal = BSLoad;
14693 if (N->getValueType(0) == MVT::i16)
14694 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
14695
14696 // First, combine the bswap away. This makes the value produced by the
14697 // load dead.
14698 DCI.CombineTo(N, ResVal);
14699
14700 // Next, combine the load away, we give it a bogus result value but a real
14701 // chain result. The result value is dead because the bswap is dead.
14702 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14703
14704 // Return N so it doesn't get rechecked!
14705 return SDValue(N, 0);
14706 }
14707 break;
14708 case PPCISD::VCMP:
14709 // If a VCMP_rec node already exists with exactly the same operands as this
14710 // node, use its result instead of this node (VCMP_rec computes both a CR6
14711 // and a normal output).
14712 //
14713 if (!N->getOperand(0).hasOneUse() &&
14714 !N->getOperand(1).hasOneUse() &&
14715 !N->getOperand(2).hasOneUse()) {
14716
14717 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
14718 SDNode *VCMPrecNode = nullptr;
14719
14720 SDNode *LHSN = N->getOperand(0).getNode();
14721 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14722 UI != E; ++UI)
14723 if (UI->getOpcode() == PPCISD::VCMP_rec &&
14724 UI->getOperand(1) == N->getOperand(1) &&
14725 UI->getOperand(2) == N->getOperand(2) &&
14726 UI->getOperand(0) == N->getOperand(0)) {
14727 VCMPrecNode = *UI;
14728 break;
14729 }
14730
14731 // If there is no VCMP_rec node, or if the flag value has a single use,
14732 // don't transform this.
14733 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
14734 break;
14735
14736 // Look at the (necessarily single) use of the flag value. If it has a
14737 // chain, this transformation is more complex. Note that multiple things
14738 // could use the value result, which we should ignore.
14739 SDNode *FlagUser = nullptr;
14740 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
14741 FlagUser == nullptr; ++UI) {
14742 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
14743 SDNode *User = *UI;
14744 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14745 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
14746 FlagUser = User;
14747 break;
14748 }
14749 }
14750 }
14751
14752 // If the user is a MFOCRF instruction, we know this is safe.
14753 // Otherwise we give up for right now.
14754 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14755 return SDValue(VCMPrecNode, 0);
14756 }
14757 break;
14758 case ISD::BRCOND: {
14759 SDValue Cond = N->getOperand(1);
14760 SDValue Target = N->getOperand(2);
14761
14762 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14763 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14765
14766 // We now need to make the intrinsic dead (it cannot be instruction
14767 // selected).
14768 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14769 assert(Cond.getNode()->hasOneUse() &&
14770 "Counter decrement has more than one use");
14771
14772 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14773 N->getOperand(0), Target);
14774 }
14775 }
14776 break;
14777 case ISD::BR_CC: {
14778 // If this is a branch on an altivec predicate comparison, lower this so
14779 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
14780 // lowering is done pre-legalize, because the legalizer lowers the predicate
14781 // compare down to code that is difficult to reassemble.
14782 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14783 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14784
14785 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14786 // value. If so, pass-through the AND to get to the intrinsic.
14787 if (LHS.getOpcode() == ISD::AND &&
14788 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14789 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14791 isa<ConstantSDNode>(LHS.getOperand(1)) &&
14792 !isNullConstant(LHS.getOperand(1)))
14793 LHS = LHS.getOperand(0);
14794
14795 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14796 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14798 isa<ConstantSDNode>(RHS)) {
14799 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14800 "Counter decrement comparison is not EQ or NE");
14801
14802 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14803 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14804 (CC == ISD::SETNE && !Val);
14805
14806 // We now need to make the intrinsic dead (it cannot be instruction
14807 // selected).
14808 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14809 assert(LHS.getNode()->hasOneUse() &&
14810 "Counter decrement has more than one use");
14811
14812 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14813 N->getOperand(0), N->getOperand(4));
14814 }
14815
14816 int CompareOpc;
14817 bool isDot;
14818
14819 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14820 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14821 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14822 assert(isDot && "Can't compare against a vector result!");
14823
14824 // If this is a comparison against something other than 0/1, then we know
14825 // that the condition is never/always true.
14826 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14827 if (Val != 0 && Val != 1) {
14828 if (CC == ISD::SETEQ) // Cond never true, remove branch.
14829 return N->getOperand(0);
14830 // Always !=, turn it into an unconditional branch.
14831 return DAG.getNode(ISD::BR, dl, MVT::Other,
14832 N->getOperand(0), N->getOperand(4));
14833 }
14834
14835 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14836
14837 // Create the PPCISD altivec 'dot' comparison node.
14838 SDValue Ops[] = {
14839 LHS.getOperand(2), // LHS of compare
14840 LHS.getOperand(3), // RHS of compare
14841 DAG.getConstant(CompareOpc, dl, MVT::i32)
14842 };
14843 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14844 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
14845
14846 // Unpack the result based on how the target uses it.
14847 PPC::Predicate CompOpc;
14848 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14849 default: // Can't happen, don't crash on invalid number though.
14850 case 0: // Branch on the value of the EQ bit of CR6.
14851 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
14852 break;
14853 case 1: // Branch on the inverted value of the EQ bit of CR6.
14854 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
14855 break;
14856 case 2: // Branch on the value of the LT bit of CR6.
14857 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
14858 break;
14859 case 3: // Branch on the inverted value of the LT bit of CR6.
14860 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
14861 break;
14862 }
14863
14864 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14865 DAG.getConstant(CompOpc, dl, MVT::i32),
14866 DAG.getRegister(PPC::CR6, MVT::i32),
14867 N->getOperand(4), CompNode.getValue(1));
14868 }
14869 break;
14870 }
14871 case ISD::BUILD_VECTOR:
14872 return DAGCombineBuildVector(N, DCI);
14873 case ISD::ABS:
14874 return combineABS(N, DCI);
14875 case ISD::VSELECT:
14876 return combineVSelect(N, DCI);
14877 }
14878
14879 return SDValue();
14880}
14881
14882SDValue
14884 SelectionDAG &DAG,
14885 SmallVectorImpl<SDNode *> &Created) const {
14886 // fold (sdiv X, pow2)
14887 EVT VT = N->getValueType(0);
14888 if (VT == MVT::i64 && !Subtarget.isPPC64())
14889 return SDValue();
14890 if ((VT != MVT::i32 && VT != MVT::i64) ||
14891 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14892 return SDValue();
14893
14894 SDLoc DL(N);
14895 SDValue N0 = N->getOperand(0);
14896
14897 bool IsNegPow2 = (-Divisor).isPowerOf2();
14898 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14899 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14900
14901 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14902 Created.push_back(Op.getNode());
14903
14904 if (IsNegPow2) {
14905 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14906 Created.push_back(Op.getNode());
14907 }
14908
14909 return Op;
14910}
14911
14912//===----------------------------------------------------------------------===//
14913// Inline Assembly Support
14914//===----------------------------------------------------------------------===//
14915
14917 KnownBits &Known,
14918 const APInt &DemandedElts,
14919 const SelectionDAG &DAG,
14920 unsigned Depth) const {
14921 Known.resetAll();
14922 switch (Op.getOpcode()) {
14923 default: break;
14924 case PPCISD::LBRX: {
14925 // lhbrx is known to have the top bits cleared out.
14926 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14927 Known.Zero = 0xFFFF0000;
14928 break;
14929 }
14931 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14932 default: break;
14933 case Intrinsic::ppc_altivec_vcmpbfp_p:
14934 case Intrinsic::ppc_altivec_vcmpeqfp_p:
14935 case Intrinsic::ppc_altivec_vcmpequb_p:
14936 case Intrinsic::ppc_altivec_vcmpequh_p:
14937 case Intrinsic::ppc_altivec_vcmpequw_p:
14938 case Intrinsic::ppc_altivec_vcmpequd_p:
14939 case Intrinsic::ppc_altivec_vcmpequq_p:
14940 case Intrinsic::ppc_altivec_vcmpgefp_p:
14941 case Intrinsic::ppc_altivec_vcmpgtfp_p:
14942 case Intrinsic::ppc_altivec_vcmpgtsb_p:
14943 case Intrinsic::ppc_altivec_vcmpgtsh_p:
14944 case Intrinsic::ppc_altivec_vcmpgtsw_p:
14945 case Intrinsic::ppc_altivec_vcmpgtsd_p:
14946 case Intrinsic::ppc_altivec_vcmpgtsq_p:
14947 case Intrinsic::ppc_altivec_vcmpgtub_p:
14948 case Intrinsic::ppc_altivec_vcmpgtuh_p:
14949 case Intrinsic::ppc_altivec_vcmpgtuw_p:
14950 case Intrinsic::ppc_altivec_vcmpgtud_p:
14951 case Intrinsic::ppc_altivec_vcmpgtuq_p:
14952 Known.Zero = ~1U; // All bits but the low one are known to be zero.
14953 break;
14954 }
14955 }
14956 }
14957}
14958
14960 switch (Subtarget.getCPUDirective()) {
14961 default: break;
14962 case PPC::DIR_970:
14963 case PPC::DIR_PWR4:
14964 case PPC::DIR_PWR5:
14965 case PPC::DIR_PWR5X:
14966 case PPC::DIR_PWR6:
14967 case PPC::DIR_PWR6X:
14968 case PPC::DIR_PWR7:
14969 case PPC::DIR_PWR8:
14970 case PPC::DIR_PWR9:
14971 case PPC::DIR_PWR10:
14972 case PPC::DIR_PWR_FUTURE: {
14973 if (!ML)
14974 break;
14975
14977 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14978 // so that we can decrease cache misses and branch-prediction misses.
14979 // Actual alignment of the loop will depend on the hotness check and other
14980 // logic in alignBlocks.
14981 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14982 return Align(32);
14983 }
14984
14985 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14986
14987 // For small loops (between 5 and 8 instructions), align to a 32-byte
14988 // boundary so that the entire loop fits in one instruction-cache line.
14989 uint64_t LoopSize = 0;
14990 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14991 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14992 LoopSize += TII->getInstSizeInBytes(*J);
14993 if (LoopSize > 32)
14994 break;
14995 }
14996
14997 if (LoopSize > 16 && LoopSize <= 32)
14998 return Align(32);
14999
15000 break;
15001 }
15002 }
15003
15005}
15006
15007/// getConstraintType - Given a constraint, return the type of
15008/// constraint it is for this target.
15011 if (Constraint.size() == 1) {
15012 switch (Constraint[0]) {
15013 default: break;
15014 case 'b':
15015 case 'r':
15016 case 'f':
15017 case 'd':
15018 case 'v':
15019 case 'y':
15020 return C_RegisterClass;
15021 case 'Z':
15022 // FIXME: While Z does indicate a memory constraint, it specifically
15023 // indicates an r+r address (used in conjunction with the 'y' modifier
15024 // in the replacement string). Currently, we're forcing the base
15025 // register to be r0 in the asm printer (which is interpreted as zero)
15026 // and forming the complete address in the second register. This is
15027 // suboptimal.
15028 return C_Memory;
15029 }
15030 } else if (Constraint == "wc") { // individual CR bits.
15031 return C_RegisterClass;
15032 } else if (Constraint == "wa" || Constraint == "wd" ||
15033 Constraint == "wf" || Constraint == "ws" ||
15034 Constraint == "wi" || Constraint == "ww") {
15035 return C_RegisterClass; // VSX registers.
15036 }
15037 return TargetLowering::getConstraintType(Constraint);
15038}
15039
15040/// Examine constraint type and operand type and determine a weight value.
15041/// This object must already have been set up with the operand type
15042/// and the current alternative constraint selected.
15045 AsmOperandInfo &info, const char *constraint) const {
15047 Value *CallOperandVal = info.CallOperandVal;
15048 // If we don't have a value, we can't do a match,
15049 // but allow it at the lowest weight.
15050 if (!CallOperandVal)
15051 return CW_Default;
15052 Type *type = CallOperandVal->getType();
15053
15054 // Look at the constraint type.
15055 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15056 return CW_Register; // an individual CR bit.
15057 else if ((StringRef(constraint) == "wa" ||
15058 StringRef(constraint) == "wd" ||
15059 StringRef(constraint) == "wf") &&
15060 type->isVectorTy())
15061 return CW_Register;
15062 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15063 return CW_Register; // just hold 64-bit integers data.
15064 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15065 return CW_Register;
15066 else if (StringRef(constraint) == "ww" && type->isFloatTy())
15067 return CW_Register;
15068
15069 switch (*constraint) {
15070 default:
15072 break;
15073 case 'b':
15074 if (type->isIntegerTy())
15075 weight = CW_Register;
15076 break;
15077 case 'f':
15078 if (type->isFloatTy())
15079 weight = CW_Register;
15080 break;
15081 case 'd':
15082 if (type->isDoubleTy())
15083 weight = CW_Register;
15084 break;
15085 case 'v':
15086 if (type->isVectorTy())
15087 weight = CW_Register;
15088 break;
15089 case 'y':
15090 weight = CW_Register;
15091 break;
15092 case 'Z':
15093 weight = CW_Memory;
15094 break;
15095 }
15096 return weight;
15097}
15098
15099std::pair<unsigned, const TargetRegisterClass *>
15101 StringRef Constraint,
15102 MVT VT) const {
15103 if (Constraint.size() == 1) {
15104 // GCC RS6000 Constraint Letters
15105 switch (Constraint[0]) {
15106 case 'b': // R1-R31
15107 if (VT == MVT::i64 && Subtarget.isPPC64())
15108 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15109 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15110 case 'r': // R0-R31
15111 if (VT == MVT::i64 && Subtarget.isPPC64())
15112 return std::make_pair(0U, &PPC::G8RCRegClass);
15113 return std::make_pair(0U, &PPC::GPRCRegClass);
15114 // 'd' and 'f' constraints are both defined to be "the floating point
15115 // registers", where one is for 32-bit and the other for 64-bit. We don't
15116 // really care overly much here so just give them all the same reg classes.
15117 case 'd':
15118 case 'f':
15119 if (Subtarget.hasSPE()) {
15120 if (VT == MVT::f32 || VT == MVT::i32)
15121 return std::make_pair(0U, &PPC::GPRCRegClass);
15122 if (VT == MVT::f64 || VT == MVT::i64)
15123 return std::make_pair(0U, &PPC::SPERCRegClass);
15124 } else {
15125 if (VT == MVT::f32 || VT == MVT::i32)
15126 return std::make_pair(0U, &PPC::F4RCRegClass);
15127 if (VT == MVT::f64 || VT == MVT::i64)
15128 return std::make_pair(0U, &PPC::F8RCRegClass);
15129 }
15130 break;
15131 case 'v':
15132 if (Subtarget.hasAltivec())
15133 return std::make_pair(0U, &PPC::VRRCRegClass);
15134 break;
15135 case 'y': // crrc
15136 return std::make_pair(0U, &PPC::CRRCRegClass);
15137 }
15138 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15139 // An individual CR bit.
15140 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15141 } else if ((Constraint == "wa" || Constraint == "wd" ||
15142 Constraint == "wf" || Constraint == "wi") &&
15143 Subtarget.hasVSX()) {
15144 return std::make_pair(0U, &PPC::VSRCRegClass);
15145 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15146 if (VT == MVT::f32 && Subtarget.hasP8Vector())
15147 return std::make_pair(0U, &PPC::VSSRCRegClass);
15148 else
15149 return std::make_pair(0U, &PPC::VSFRCRegClass);
15150 } else if (Constraint == "lr") {
15151 if (VT == MVT::i64)
15152 return std::make_pair(0U, &PPC::LR8RCRegClass);
15153 else
15154 return std::make_pair(0U, &PPC::LRRCRegClass);
15155 }
15156
15157 // Handle special cases of physical registers that are not properly handled
15158 // by the base class.
15159 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
15160 // If we name a VSX register, we can't defer to the base class because it
15161 // will not recognize the correct register (their names will be VSL{0-31}
15162 // and V{0-31} so they won't match). So we match them here.
15163 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15164 int VSNum = atoi(Constraint.data() + 3);
15165 assert(VSNum >= 0 && VSNum <= 63 &&
15166 "Attempted to access a vsr out of range");
15167 if (VSNum < 32)
15168 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15169 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15170 }
15171
15172 // For float registers, we can't defer to the base class as it will match
15173 // the SPILLTOVSRRC class.
15174 if (Constraint.size() > 3 && Constraint[1] == 'f') {
15175 int RegNum = atoi(Constraint.data() + 2);
15176 if (RegNum > 31 || RegNum < 0)
15177 report_fatal_error("Invalid floating point register number");
15178 if (VT == MVT::f32 || VT == MVT::i32)
15179 return Subtarget.hasSPE()
15180 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
15181 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
15182 if (VT == MVT::f64 || VT == MVT::i64)
15183 return Subtarget.hasSPE()
15184 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
15185 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
15186 }
15187 }
15188
15189 std::pair<unsigned, const TargetRegisterClass *> R =
15191
15192 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15193 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15194 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15195 // register.
15196 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15197 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15198 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15199 PPC::GPRCRegClass.contains(R.first))
15200 return std::make_pair(TRI->getMatchingSuperReg(R.first,
15201 PPC::sub_32, &PPC::G8RCRegClass),
15202 &PPC::G8RCRegClass);
15203
15204 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15205 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15206 R.first = PPC::CR0;
15207 R.second = &PPC::CRRCRegClass;
15208 }
15209
15210 return R;
15211}
15212
15213/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15214/// vector. If it is invalid, don't add anything to Ops.
15216 std::string &Constraint,
15217 std::vector<SDValue>&Ops,
15218 SelectionDAG &DAG) const {
15219 SDValue Result;
15220
15221 // Only support length 1 constraints.
15222 if (Constraint.length() > 1) return;
15223
15224 char Letter = Constraint[0];
15225 switch (Letter) {
15226 default: break;
15227 case 'I':
15228 case 'J':
15229 case 'K':
15230 case 'L':
15231 case 'M':
15232 case 'N':
15233 case 'O':
15234 case 'P': {
15236 if (!CST) return; // Must be an immediate to match.
15237 SDLoc dl(Op);
15238 int64_t Value = CST->getSExtValue();
15239 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15240 // numbers are printed as such.
15241 switch (Letter) {
15242 default: llvm_unreachable("Unknown constraint letter!");
15243 case 'I': // "I" is a signed 16-bit constant.
15244 if (isInt<16>(Value))
15245 Result = DAG.getTargetConstant(Value, dl, TCVT);
15246 break;
15247 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15249 Result = DAG.getTargetConstant(Value, dl, TCVT);
15250 break;
15251 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15253 Result = DAG.getTargetConstant(Value, dl, TCVT);
15254 break;
15255 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15256 if (isUInt<16>(Value))
15257 Result = DAG.getTargetConstant(Value, dl, TCVT);
15258 break;
15259 case 'M': // "M" is a constant that is greater than 31.
15260 if (Value > 31)
15261 Result = DAG.getTargetConstant(Value, dl, TCVT);
15262 break;
15263 case 'N': // "N" is a positive constant that is an exact power of two.
15264 if (Value > 0 && isPowerOf2_64(Value))
15265 Result = DAG.getTargetConstant(Value, dl, TCVT);
15266 break;
15267 case 'O': // "O" is the constant zero.
15268 if (Value == 0)
15269 Result = DAG.getTargetConstant(Value, dl, TCVT);
15270 break;
15271 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15272 if (isInt<16>(-Value))
15273 Result = DAG.getTargetConstant(Value, dl, TCVT);
15274 break;
15275 }
15276 break;
15277 }
15278 }
15279
15280 if (Result.getNode()) {
15281 Ops.push_back(Result);
15282 return;
15283 }
15284
15285 // Handle standard constraint letters.
15287}
15288
15289// isLegalAddressingMode - Return true if the addressing mode represented
15290// by AM is legal for this target, for a load/store of the specified type.
15292 const AddrMode &AM, Type *Ty,
15293 unsigned AS,
15294 Instruction *I) const {
15295 // Vector type r+i form is supported since power9 as DQ form. We don't check
15296 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15297 // imm form is preferred and the offset can be adjusted to use imm form later
15298 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15299 // max offset to check legal addressing mode, we should be a little aggressive
15300 // to contain other offsets for that LSRUse.
15301 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15302 return false;
15303
15304 // PPC allows a sign-extended 16-bit immediate field.
15305 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15306 return false;
15307
15308 // No global is ever allowed as a base.
15309 if (AM.BaseGV)
15310 return false;
15311
15312 // PPC only support r+r,
15313 switch (AM.Scale) {
15314 case 0: // "r+i" or just "i", depending on HasBaseReg.
15315 break;
15316 case 1:
15317 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15318 return false;
15319 // Otherwise we have r+r or r+i.
15320 break;
15321 case 2:
15322 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15323 return false;
15324 // Allow 2*r as r+r.
15325 break;
15326 default:
15327 // No other scales are supported.
15328 return false;
15329 }
15330
15331 return true;
15332}
15333
15334SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15335 SelectionDAG &DAG) const {
15337 MachineFrameInfo &MFI = MF.getFrameInfo();
15338 MFI.setReturnAddressIsTaken(true);
15339
15341 return SDValue();
15342
15343 SDLoc dl(Op);
15344 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15345
15346 // Make sure the function does not optimize away the store of the RA to
15347 // the stack.
15348 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15349 FuncInfo->setLRStoreRequired();
15350 bool isPPC64 = Subtarget.isPPC64();
15351 auto PtrVT = getPointerTy(MF.getDataLayout());
15352
15353 if (Depth > 0) {
15354 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15355 SDValue Offset =
15356 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15357 isPPC64 ? MVT::i64 : MVT::i32);
15358 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15359 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15361 }
15362
15363 // Just load the return address off the stack.
15364 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15365 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15367}
15368
15369SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15370 SelectionDAG &DAG) const {
15371 SDLoc dl(Op);
15372 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15373
15375 MachineFrameInfo &MFI = MF.getFrameInfo();
15376 MFI.setFrameAddressIsTaken(true);
15377
15378 EVT PtrVT = getPointerTy(MF.getDataLayout());
15379 bool isPPC64 = PtrVT == MVT::i64;
15380
15381 // Naked functions never have a frame pointer, and so we use r1. For all
15382 // other functions, this decision must be delayed until during PEI.
15383 unsigned FrameReg;
15384 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15385 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15386 else
15387 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15388
15389 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15390 PtrVT);
15391 while (Depth--)
15392 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15393 FrameAddr, MachinePointerInfo());
15394 return FrameAddr;
15395}
15396
15397// FIXME? Maybe this could be a TableGen attribute on some registers and
15398// this table could be generated automatically from RegInfo.
15400 const MachineFunction &MF) const {
15401 bool isPPC64 = Subtarget.isPPC64();
15402
15403 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15404 if (!is64Bit && VT != LLT::scalar(32))
15405 report_fatal_error("Invalid register global variable type");
15406
15408 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15409 .Case("r2", isPPC64 ? Register() : PPC::R2)
15410 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15411 .Default(Register());
15412
15413 if (Reg)
15414 return Reg;
15415 report_fatal_error("Invalid register name global variable");
15416}
15417
15419 // 32-bit SVR4 ABI access everything as got-indirect.
15420 if (Subtarget.is32BitELFABI())
15421 return true;
15422
15423 // AIX accesses everything indirectly through the TOC, which is similar to
15424 // the GOT.
15425 if (Subtarget.isAIXABI())
15426 return true;
15427
15429 // If it is small or large code model, module locals are accessed
15430 // indirectly by loading their address from .toc/.got.
15431 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15432 return true;
15433
15434 // JumpTable and BlockAddress are accessed as got-indirect.
15436 return true;
15437
15439 return Subtarget.isGVIndirectSymbol(G->getGlobal());
15440
15441 return false;
15442}
15443
15444bool
15446 // The PowerPC target isn't yet aware of offsets.
15447 return false;
15448}
15449
15451 const CallInst &I,
15452 MachineFunction &MF,
15453 unsigned Intrinsic) const {
15454 switch (Intrinsic) {
15455 case Intrinsic::ppc_altivec_lvx:
15456 case Intrinsic::ppc_altivec_lvxl:
15457 case Intrinsic::ppc_altivec_lvebx:
15458 case Intrinsic::ppc_altivec_lvehx:
15459 case Intrinsic::ppc_altivec_lvewx:
15460 case Intrinsic::ppc_vsx_lxvd2x:
15461 case Intrinsic::ppc_vsx_lxvw4x:
15462 case Intrinsic::ppc_vsx_lxvd2x_be:
15463 case Intrinsic::ppc_vsx_lxvw4x_be:
15464 case Intrinsic::ppc_vsx_lxvl:
15465 case Intrinsic::ppc_vsx_lxvll: {
15466 EVT VT;
15467 switch (Intrinsic) {
15468 case Intrinsic::ppc_altivec_lvebx:
15469 VT = MVT::i8;
15470 break;
15471 case Intrinsic::ppc_altivec_lvehx:
15472 VT = MVT::i16;
15473 break;
15474 case Intrinsic::ppc_altivec_lvewx:
15475 VT = MVT::i32;
15476 break;
15477 case Intrinsic::ppc_vsx_lxvd2x:
15478 case Intrinsic::ppc_vsx_lxvd2x_be:
15479 VT = MVT::v2f64;
15480 break;
15481 default:
15482 VT = MVT::v4i32;
15483 break;
15484 }
15485
15486 Info.opc = ISD::INTRINSIC_W_CHAIN;
15487 Info.memVT = VT;
15488 Info.ptrVal = I.getArgOperand(0);
15489 Info.offset = -VT.getStoreSize()+1;
15490 Info.size = 2*VT.getStoreSize()-1;
15491 Info.align = Align(1);
15492 Info.flags = MachineMemOperand::MOLoad;
15493 return true;
15494 }
15495 case Intrinsic::ppc_altivec_stvx:
15496 case Intrinsic::ppc_altivec_stvxl:
15497 case Intrinsic::ppc_altivec_stvebx:
15498 case Intrinsic::ppc_altivec_stvehx:
15499 case Intrinsic::ppc_altivec_stvewx:
15500 case Intrinsic::ppc_vsx_stxvd2x:
15501 case Intrinsic::ppc_vsx_stxvw4x:
15502 case Intrinsic::ppc_vsx_stxvd2x_be:
15503 case Intrinsic::ppc_vsx_stxvw4x_be:
15504 case Intrinsic::ppc_vsx_stxvl:
15505 case Intrinsic::ppc_vsx_stxvll: {
15506 EVT VT;
15507 switch (Intrinsic) {
15508 case Intrinsic::ppc_altivec_stvebx:
15509 VT = MVT::i8;
15510 break;
15511 case Intrinsic::ppc_altivec_stvehx:
15512 VT = MVT::i16;
15513 break;
15514 case Intrinsic::ppc_altivec_stvewx:
15515 VT = MVT::i32;
15516 break;
15517 case Intrinsic::ppc_vsx_stxvd2x:
15518 case Intrinsic::ppc_vsx_stxvd2x_be:
15519 VT = MVT::v2f64;
15520 break;
15521 default:
15522 VT = MVT::v4i32;
15523 break;
15524 }
15525
15526 Info.opc = ISD::INTRINSIC_VOID;
15527 Info.memVT = VT;
15528 Info.ptrVal = I.getArgOperand(1);
15529 Info.offset = -VT.getStoreSize()+1;
15530 Info.size = 2*VT.getStoreSize()-1;
15531 Info.align = Align(1);
15532 Info.flags = MachineMemOperand::MOStore;
15533 return true;
15534 }
15535 default:
15536 break;
15537 }
15538
15539 return false;
15540}
15541
15542/// It returns EVT::Other if the type should be determined using generic
15543/// target-independent logic.
15545 const MemOp &Op, const AttributeList &FuncAttributes) const {
15546 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15547 // We should use Altivec/VSX loads and stores when available. For unaligned
15548 // addresses, unaligned VSX loads are only fast starting with the P8.
15549 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15550 (Op.isAligned(Align(16)) ||
15551 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15552 return MVT::v4i32;
15553 }
15554
15555 if (Subtarget.isPPC64()) {
15556 return MVT::i64;
15557 }
15558
15559 return MVT::i32;
15560}
15561
15562/// Returns true if it is beneficial to convert a load of a constant
15563/// to just the constant itself.
15565 Type *Ty) const {
15566 assert(Ty->isIntegerTy());
15567
15568 unsigned BitSize = Ty->getPrimitiveSizeInBits();
15569 return !(BitSize == 0 || BitSize > 64);
15570}
15571
15573 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15574 return false;
15575 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15576 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15577 return NumBits1 == 64 && NumBits2 == 32;
15578}
15579
15581 if (!VT1.isInteger() || !VT2.isInteger())
15582 return false;
15583 unsigned NumBits1 = VT1.getSizeInBits();
15584 unsigned NumBits2 = VT2.getSizeInBits();
15585 return NumBits1 == 64 && NumBits2 == 32;
15586}
15587
15589 // Generally speaking, zexts are not free, but they are free when they can be
15590 // folded with other operations.
15591 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15592 EVT MemVT = LD->getMemoryVT();
15593 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15594 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15595 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15596 LD->getExtensionType() == ISD::ZEXTLOAD))
15597 return true;
15598 }
15599
15600 // FIXME: Add other cases...
15601 // - 32-bit shifts with a zext to i64
15602 // - zext after ctlz, bswap, etc.
15603 // - zext after and by a constant mask
15604
15605 return TargetLowering::isZExtFree(Val, VT2);
15606}
15607
15608bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15609 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15610 "invalid fpext types");
15611 // Extending to float128 is not free.
15612 if (DestVT == MVT::f128)
15613 return false;
15614 return true;
15615}
15616
15618 return isInt<16>(Imm) || isUInt<16>(Imm);
15619}
15620
15622 return isInt<16>(Imm) || isUInt<16>(Imm);
15623}
15624
15626 unsigned,
15627 unsigned,
15629 bool *Fast) const {
15631 return false;
15632
15633 // PowerPC supports unaligned memory access for simple non-vector types.
15634 // Although accessing unaligned addresses is not as efficient as accessing
15635 // aligned addresses, it is generally more efficient than manual expansion,
15636 // and generally only traps for software emulation when crossing page
15637 // boundaries.
15638
15639 if (!VT.isSimple())
15640 return false;
15641
15642 if (VT.isFloatingPoint() && !VT.isVector() &&
15643 !Subtarget.allowsUnalignedFPAccess())
15644 return false;
15645
15646 if (VT.getSimpleVT().isVector()) {
15647 if (Subtarget.hasVSX()) {
15648 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15649 VT != MVT::v4f32 && VT != MVT::v4i32)
15650 return false;
15651 } else {
15652 return false;
15653 }
15654 }
15655
15656 if (VT == MVT::ppcf128)
15657 return false;
15658
15659 if (Fast)
15660 *Fast = true;
15661
15662 return true;
15663}
15664
15666 SDValue C) const {
15667 // Check integral scalar types.
15668 if (!VT.isScalarInteger())
15669 return false;
15670 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
15671 if (!ConstNode->getAPIntValue().isSignedIntN(64))
15672 return false;
15673 // This transformation will generate >= 2 operations. But the following
15674 // cases will generate <= 2 instructions during ISEL. So exclude them.
15675 // 1. If the constant multiplier fits 16 bits, it can be handled by one
15676 // HW instruction, ie. MULLI
15677 // 2. If the multiplier after shifted fits 16 bits, an extra shift
15678 // instruction is needed than case 1, ie. MULLI and RLDICR
15679 int64_t Imm = ConstNode->getSExtValue();
15680 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
15681 Imm >>= Shift;
15682 if (isInt<16>(Imm))
15683 return false;
15684 uint64_t UImm = static_cast<uint64_t>(Imm);
15685 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
15686 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
15687 return true;
15688 }
15689 return false;
15690}
15691
15697
15699 Type *Ty) const {
15700 switch (Ty->getScalarType()->getTypeID()) {
15701 case Type::FloatTyID:
15702 case Type::DoubleTyID:
15703 return true;
15704 case Type::FP128TyID:
15705 return Subtarget.hasP9Vector();
15706 default:
15707 return false;
15708 }
15709}
15710
15711// FIXME: add more patterns which are not profitable to hoist.
15713 if (!I->hasOneUse())
15714 return true;
15715
15717 assert(User && "A single use instruction with no uses.");
15718
15719 switch (I->getOpcode()) {
15720 case Instruction::FMul: {
15721 // Don't break FMA, PowerPC prefers FMA.
15722 if (User->getOpcode() != Instruction::FSub &&
15723 User->getOpcode() != Instruction::FAdd)
15724 return true;
15725
15726 const TargetOptions &Options = getTargetMachine().Options;
15727 const Function *F = I->getFunction();
15728 const DataLayout &DL = F->getParent()->getDataLayout();
15729 Type *Ty = User->getOperand(0)->getType();
15730
15731 return !(
15734 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15735 }
15736 case Instruction::Load: {
15737 // Don't break "store (load float*)" pattern, this pattern will be combined
15738 // to "store (load int32)" in later InstCombine pass. See function
15739 // combineLoadToOperationType. On PowerPC, loading a float point takes more
15740 // cycles than loading a 32 bit integer.
15741 LoadInst *LI = cast<LoadInst>(I);
15742 // For the loads that combineLoadToOperationType does nothing, like
15743 // ordered load, it should be profitable to hoist them.
15744 // For swifterror load, it can only be used for pointer to pointer type, so
15745 // later type check should get rid of this case.
15746 if (!LI->isUnordered())
15747 return true;
15748
15749 if (User->getOpcode() != Instruction::Store)
15750 return true;
15751
15752 if (I->getType()->getTypeID() != Type::FloatTyID)
15753 return true;
15754
15755 return false;
15756 }
15757 default:
15758 return true;
15759 }
15760 return true;
15761}
15762
15763const MCPhysReg *
15765 // LR is a callee-save register, but we must treat it as clobbered by any call
15766 // site. Hence we include LR in the scratch registers, which are in turn added
15767 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15768 // to CTR, which is used by any indirect call.
15769 static const MCPhysReg ScratchRegs[] = {
15770 PPC::X12, PPC::LR8, PPC::CTR8, 0
15771 };
15772
15773 return ScratchRegs;
15774}
15775
15777 const Constant *PersonalityFn) const {
15778 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15779}
15780
15782 const Constant *PersonalityFn) const {
15783 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15784}
15785
15786bool
15788 EVT VT , unsigned DefinedValues) const {
15789 if (VT == MVT::v2i64)
15790 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15791
15792 if (Subtarget.hasVSX())
15793 return true;
15794
15796}
15797
15804
15805// Create a fast isel object.
15806FastISel *
15808 const TargetLibraryInfo *LibInfo) const {
15809 return PPC::createFastISel(FuncInfo, LibInfo);
15810}
15811
15812// 'Inverted' means the FMA opcode after negating one multiplicand.
15813// For example, (fma -a b c) = (fnmsub a b c)
15814static unsigned invertFMAOpcode(unsigned Opc) {
15815 switch (Opc) {
15816 default:
15817 llvm_unreachable("Invalid FMA opcode for PowerPC!");
15818 case ISD::FMA:
15819 return PPCISD::FNMSUB;
15820 case PPCISD::FNMSUB:
15821 return ISD::FMA;
15822 }
15823}
15824
15826 bool LegalOps, bool OptForSize,
15827 NegatibleCost &Cost,
15828 unsigned Depth) const {
15830 return SDValue();
15831
15832 unsigned Opc = Op.getOpcode();
15833 EVT VT = Op.getValueType();
15834 SDNodeFlags Flags = Op.getNode()->getFlags();
15835
15836 switch (Opc) {
15837 case PPCISD::FNMSUB:
15838 if (!Op.hasOneUse() || !isTypeLegal(VT))
15839 break;
15840
15841 const TargetOptions &Options = getTargetMachine().Options;
15842 SDValue N0 = Op.getOperand(0);
15843 SDValue N1 = Op.getOperand(1);
15844 SDValue N2 = Op.getOperand(2);
15845 SDLoc Loc(Op);
15846
15848 SDValue NegN2 =
15849 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
15850
15851 if (!NegN2)
15852 return SDValue();
15853
15854 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
15855 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
15856 // These transformations may change sign of zeroes. For example,
15857 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
15858 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
15859 // Try and choose the cheaper one to negate.
15861 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
15862 N0Cost, Depth + 1);
15863
15865 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
15866 N1Cost, Depth + 1);
15867
15868 if (NegN0 && N0Cost <= N1Cost) {
15869 Cost = std::min(N0Cost, N2Cost);
15870 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
15871 } else if (NegN1) {
15872 Cost = std::min(N1Cost, N2Cost);
15873 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
15874 }
15875 }
15876
15877 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
15878 if (isOperationLegal(ISD::FMA, VT)) {
15879 Cost = N2Cost;
15880 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
15881 }
15882
15883 break;
15884 }
15885
15886 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
15887 Cost, Depth);
15888}
15889
15890// Override to enable LOAD_STACK_GUARD lowering on Linux.
15892 if (!Subtarget.isTargetLinux())
15894 return true;
15895}
15896
15897// Override to disable global variable loading on Linux.
15899 if (!Subtarget.isTargetLinux())
15901}
15902
15904 bool ForCodeSize) const {
15905 if (!VT.isSimple() || !Subtarget.hasVSX())
15906 return false;
15907
15908 switch(VT.getSimpleVT().SimpleTy) {
15909 default:
15910 // For FP types that are currently not supported by PPC backend, return
15911 // false. Examples: f16, f80.
15912 return false;
15913 case MVT::f32:
15914 case MVT::f64:
15915 if (Subtarget.hasPrefixInstrs()) {
15916 // With prefixed instructions, we can materialize anything that can be
15917 // represented with a 32-bit immediate, not just positive zero.
15918 APFloat APFloatOfImm = Imm;
15919 return convertToNonDenormSingle(APFloatOfImm);
15920 }
15922 case MVT::ppcf128:
15923 return Imm.isPosZero();
15924 }
15925}
15926
15927// For vector shift operation op, fold
15928// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15930 SelectionDAG &DAG) {
15931 SDValue N0 = N->getOperand(0);
15932 SDValue N1 = N->getOperand(1);
15933 EVT VT = N0.getValueType();
15934 unsigned OpSizeInBits = VT.getScalarSizeInBits();
15935 unsigned Opcode = N->getOpcode();
15936 unsigned TargetOpcode;
15937
15938 switch (Opcode) {
15939 default:
15940 llvm_unreachable("Unexpected shift operation");
15941 case ISD::SHL:
15942 TargetOpcode = PPCISD::SHL;
15943 break;
15944 case ISD::SRL:
15945 TargetOpcode = PPCISD::SRL;
15946 break;
15947 case ISD::SRA:
15948 TargetOpcode = PPCISD::SRA;
15949 break;
15950 }
15951
15952 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15953 N1->getOpcode() == ISD::AND)
15954 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
15955 if (Mask->getZExtValue() == OpSizeInBits - 1)
15956 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15957
15958 return SDValue();
15959}
15960
15961SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15962 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15963 return Value;
15964
15965 SDValue N0 = N->getOperand(0);
15966 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15967 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
15968 N0.getOpcode() != ISD::SIGN_EXTEND ||
15969 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
15970 N->getValueType(0) != MVT::i64)
15971 return SDValue();
15972
15973 // We can't save an operation here if the value is already extended, and
15974 // the existing shift is easier to combine.
15975 SDValue ExtsSrc = N0.getOperand(0);
15976 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15977 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15978 return SDValue();
15979
15980 SDLoc DL(N0);
15981 SDValue ShiftBy = SDValue(CN1, 0);
15982 // We want the shift amount to be i32 on the extswli, but the shift could
15983 // have an i64.
15984 if (ShiftBy.getValueType() == MVT::i64)
15985 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15986
15987 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15988 ShiftBy);
15989}
15990
15991SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15992 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15993 return Value;
15994
15995 return SDValue();
15996}
15997
15998SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15999 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16000 return Value;
16001
16002 return SDValue();
16003}
16004
16005// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16006// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16007// When C is zero, the equation (addi Z, -C) can be simplified to Z
16008// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16010 const PPCSubtarget &Subtarget) {
16011 if (!Subtarget.isPPC64())
16012 return SDValue();
16013
16014 SDValue LHS = N->getOperand(0);
16015 SDValue RHS = N->getOperand(1);
16016
16017 auto isZextOfCompareWithConstant = [](SDValue Op) {
16018 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16019 Op.getValueType() != MVT::i64)
16020 return false;
16021
16022 SDValue Cmp = Op.getOperand(0);
16023 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16024 Cmp.getOperand(0).getValueType() != MVT::i64)
16025 return false;
16026
16027 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16028 int64_t NegConstant = 0 - Constant->getSExtValue();
16029 // Due to the limitations of the addi instruction,
16030 // -C is required to be [-32768, 32767].
16031 return isInt<16>(NegConstant);
16032 }
16033
16034 return false;
16035 };
16036
16037 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16038 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16039
16040 // If there is a pattern, canonicalize a zext operand to the RHS.
16041 if (LHSHasPattern && !RHSHasPattern)
16042 std::swap(LHS, RHS);
16043 else if (!LHSHasPattern && !RHSHasPattern)
16044 return SDValue();
16045
16046 SDLoc DL(N);
16048 SDValue Cmp = RHS.getOperand(0);
16049 SDValue Z = Cmp.getOperand(0);
16050 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16051
16052 assert(Constant && "Constant Should not be a null pointer.");
16053 int64_t NegConstant = 0 - Constant->getSExtValue();
16054
16055 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16056 default: break;
16057 case ISD::SETNE: {
16058 // when C == 0
16059 // --> addze X, (addic Z, -1).carry
16060 // /
16061 // add X, (zext(setne Z, C))--
16062 // \ when -32768 <= -C <= 32767 && C != 0
16063 // --> addze X, (addic (addi Z, -C), -1).carry
16065 DAG.getConstant(NegConstant, DL, MVT::i64));
16066 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16068 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16069 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16070 SDValue(Addc.getNode(), 1));
16071 }
16072 case ISD::SETEQ: {
16073 // when C == 0
16074 // --> addze X, (subfic Z, 0).carry
16075 // /
16076 // add X, (zext(sete Z, C))--
16077 // \ when -32768 <= -C <= 32767 && C != 0
16078 // --> addze X, (subfic (addi Z, -C), 0).carry
16080 DAG.getConstant(NegConstant, DL, MVT::i64));
16081 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16083 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16084 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16085 SDValue(Subc.getNode(), 1));
16086 }
16087 }
16088
16089 return SDValue();
16090}
16091
16092// Transform
16093// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16094// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16095// In this case both C1 and C2 must be known constants.
16096// C1+C2 must fit into a 34 bit signed integer.
16098 const PPCSubtarget &Subtarget) {
16099 if (!Subtarget.isUsingPCRelativeCalls())
16100 return SDValue();
16101
16102 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16103 // If we find that node try to cast the Global Address and the Constant.
16104 SDValue LHS = N->getOperand(0);
16105 SDValue RHS = N->getOperand(1);
16106
16107 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16108 std::swap(LHS, RHS);
16109
16110 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16111 return SDValue();
16112
16113 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16114 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16115 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16116
16117 // Check that both casts succeeded.
16118 if (!GSDN || !ConstNode)
16119 return SDValue();
16120
16121 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16122 SDLoc DL(GSDN);
16123
16124 // The signed int offset needs to fit in 34 bits.
16125 if (!isInt<34>(NewOffset))
16126 return SDValue();
16127
16128 // The new global address is a copy of the old global address except
16129 // that it has the updated Offset.
16130 SDValue GA =
16131 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16132 NewOffset, GSDN->getTargetFlags());
16133 SDValue MatPCRel =
16134 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16135 return MatPCRel;
16136}
16137
16138SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16139 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16140 return Value;
16141
16142 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16143 return Value;
16144
16145 return SDValue();
16146}
16147
16148// Detect TRUNCATE operations on bitcasts of float128 values.
16149// What we are looking for here is the situtation where we extract a subset
16150// of bits from a 128 bit float.
16151// This can be of two forms:
16152// 1) BITCAST of f128 feeding TRUNCATE
16153// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16154// The reason this is required is because we do not have a legal i128 type
16155// and so we want to prevent having to store the f128 and then reload part
16156// of it.
16157SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16158 DAGCombinerInfo &DCI) const {
16159 // If we are using CRBits then try that first.
16160 if (Subtarget.useCRBits()) {
16161 // Check if CRBits did anything and return that if it did.
16162 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16163 return CRTruncValue;
16164 }
16165
16166 SDLoc dl(N);
16167 SDValue Op0 = N->getOperand(0);
16168
16169 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16170 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16171 EVT VT = N->getValueType(0);
16172 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16173 return SDValue();
16174 SDValue Sub = Op0.getOperand(0);
16175 if (Sub.getOpcode() == ISD::SUB) {
16176 SDValue SubOp0 = Sub.getOperand(0);
16177 SDValue SubOp1 = Sub.getOperand(1);
16178 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16179 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16180 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16181 SubOp1.getOperand(0),
16182 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16183 }
16184 }
16185 }
16186
16187 // Looking for a truncate of i128 to i64.
16188 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16189 return SDValue();
16190
16191 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16192
16193 // SRL feeding TRUNCATE.
16194 if (Op0.getOpcode() == ISD::SRL) {
16196 // The right shift has to be by 64 bits.
16197 if (!ConstNode || ConstNode->getZExtValue() != 64)
16198 return SDValue();
16199
16200 // Switch the element number to extract.
16201 EltToExtract = EltToExtract ? 0 : 1;
16202 // Update Op0 past the SRL.
16203 Op0 = Op0.getOperand(0);
16204 }
16205
16206 // BITCAST feeding a TRUNCATE possibly via SRL.
16207 if (Op0.getOpcode() == ISD::BITCAST &&
16208 Op0.getValueType() == MVT::i128 &&
16209 Op0.getOperand(0).getValueType() == MVT::f128) {
16210 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16211 return DCI.DAG.getNode(
16212 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16213 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16214 }
16215 return SDValue();
16216}
16217
16218SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16219 SelectionDAG &DAG = DCI.DAG;
16220
16221 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16222 if (!ConstOpOrElement)
16223 return SDValue();
16224
16225 // An imul is usually smaller than the alternative sequence for legal type.
16227 isOperationLegal(ISD::MUL, N->getValueType(0)))
16228 return SDValue();
16229
16230 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16231 switch (this->Subtarget.getCPUDirective()) {
16232 default:
16233 // TODO: enhance the condition for subtarget before pwr8
16234 return false;
16235 case PPC::DIR_PWR8:
16236 // type mul add shl
16237 // scalar 4 1 1
16238 // vector 7 2 2
16239 return true;
16240 case PPC::DIR_PWR9:
16241 case PPC::DIR_PWR10:
16243 // type mul add shl
16244 // scalar 5 2 2
16245 // vector 7 2 2
16246
16247 // The cycle RATIO of related operations are showed as a table above.
16248 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16249 // scalar and vector type. For 2 instrs patterns, add/sub + shl
16250 // are 4, it is always profitable; but for 3 instrs patterns
16251 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16252 // So we should only do it for vector type.
16253 return IsAddOne && IsNeg ? VT.isVector() : true;
16254 }
16255 };
16256
16257 EVT VT = N->getValueType(0);
16258 SDLoc DL(N);
16259
16260 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16261 bool IsNeg = MulAmt.isNegative();
16262 APInt MulAmtAbs = MulAmt.abs();
16263
16264 if ((MulAmtAbs - 1).isPowerOf2()) {
16265 // (mul x, 2^N + 1) => (add (shl x, N), x)
16266 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16267
16268 if (!IsProfitable(IsNeg, true, VT))
16269 return SDValue();
16270
16271 SDValue Op0 = N->getOperand(0);
16272 SDValue Op1 =
16273 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16274 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16275 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16276
16277 if (!IsNeg)
16278 return Res;
16279
16280 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16281 } else if ((MulAmtAbs + 1).isPowerOf2()) {
16282 // (mul x, 2^N - 1) => (sub (shl x, N), x)
16283 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16284
16285 if (!IsProfitable(IsNeg, false, VT))
16286 return SDValue();
16287
16288 SDValue Op0 = N->getOperand(0);
16289 SDValue Op1 =
16290 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16291 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16292
16293 if (!IsNeg)
16294 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16295 else
16296 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16297
16298 } else {
16299 return SDValue();
16300 }
16301}
16302
16303// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16304// in combiner since we need to check SD flags and other subtarget features.
16305SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16306 DAGCombinerInfo &DCI) const {
16307 SDValue N0 = N->getOperand(0);
16308 SDValue N1 = N->getOperand(1);
16309 SDValue N2 = N->getOperand(2);
16310 SDNodeFlags Flags = N->getFlags();
16311 EVT VT = N->getValueType(0);
16312 SelectionDAG &DAG = DCI.DAG;
16313 const TargetOptions &Options = getTargetMachine().Options;
16314 unsigned Opc = N->getOpcode();
16315 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16316 bool LegalOps = !DCI.isBeforeLegalizeOps();
16317 SDLoc Loc(N);
16318
16319 if (!isOperationLegal(ISD::FMA, VT))
16320 return SDValue();
16321
16322 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16323 // since (fnmsub a b c)=-0 while c-ab=+0.
16324 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16325 return SDValue();
16326
16327 // (fma (fneg a) b c) => (fnmsub a b c)
16328 // (fnmsub (fneg a) b c) => (fma a b c)
16329 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16330 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16331
16332 // (fma a (fneg b) c) => (fnmsub a b c)
16333 // (fnmsub a (fneg b) c) => (fma a b c)
16334 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16335 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16336
16337 return SDValue();
16338}
16339
16340bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16341 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16342 if (!Subtarget.is64BitELFABI())
16343 return false;
16344
16345 // If not a tail call then no need to proceed.
16346 if (!CI->isTailCall())
16347 return false;
16348
16349 // If sibling calls have been disabled and tail-calls aren't guaranteed
16350 // there is no reason to duplicate.
16351 auto &TM = getTargetMachine();
16352 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16353 return false;
16354
16355 // Can't tail call a function called indirectly, or if it has variadic args.
16356 const Function *Callee = CI->getCalledFunction();
16357 if (!Callee || Callee->isVarArg())
16358 return false;
16359
16360 // Make sure the callee and caller calling conventions are eligible for tco.
16361 const Function *Caller = CI->getParent()->getParent();
16362 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16363 CI->getCallingConv()))
16364 return false;
16365
16366 // If the function is local then we have a good chance at tail-calling it
16367 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16368}
16369
16370bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16371 if (!Subtarget.hasVSX())
16372 return false;
16373 if (Subtarget.hasP9Vector() && VT == MVT::f128)
16374 return true;
16375 return VT == MVT::f32 || VT == MVT::f64 ||
16376 VT == MVT::v4f32 || VT == MVT::v2f64;
16377}
16378
16379bool PPCTargetLowering::
16380isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16381 const Value *Mask = AndI.getOperand(1);
16382 // If the mask is suitable for andi. or andis. we should sink the and.
16383 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16384 // Can't handle constants wider than 64-bits.
16385 if (CI->getBitWidth() > 64)
16386 return false;
16387 int64_t ConstVal = CI->getZExtValue();
16388 return isUInt<16>(ConstVal) ||
16389 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16390 }
16391
16392 // For non-constant masks, we can always use the record-form and.
16393 return true;
16394}
16395
16396// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16397// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16398// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16399// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16400// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16401SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16402 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16403 assert(Subtarget.hasP9Altivec() &&
16404 "Only combine this when P9 altivec supported!");
16405 EVT VT = N->getValueType(0);
16406 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16407 return SDValue();
16408
16409 SelectionDAG &DAG = DCI.DAG;
16410 SDLoc dl(N);
16411 if (N->getOperand(0).getOpcode() == ISD::SUB) {
16412 // Even for signed integers, if it's known to be positive (as signed
16413 // integer) due to zero-extended inputs.
16414 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16415 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16416 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16417 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16418 (SubOpcd1 == ISD::ZERO_EXTEND ||
16419 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16420 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16421 N->getOperand(0)->getOperand(0),
16422 N->getOperand(0)->getOperand(1),
16423 DAG.getTargetConstant(0, dl, MVT::i32));
16424 }
16425
16426 // For type v4i32, it can be optimized with xvnegsp + vabsduw
16427 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16428 N->getOperand(0).hasOneUse()) {
16429 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16430 N->getOperand(0)->getOperand(0),
16431 N->getOperand(0)->getOperand(1),
16432 DAG.getTargetConstant(1, dl, MVT::i32));
16433 }
16434 }
16435
16436 return SDValue();
16437}
16438
16439// For type v4i32/v8ii16/v16i8, transform
16440// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16441// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16442// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16443// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16444SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16445 DAGCombinerInfo &DCI) const {
16446 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16447 assert(Subtarget.hasP9Altivec() &&
16448 "Only combine this when P9 altivec supported!");
16449
16450 SelectionDAG &DAG = DCI.DAG;
16451 SDLoc dl(N);
16452 SDValue Cond = N->getOperand(0);
16453 SDValue TrueOpnd = N->getOperand(1);
16454 SDValue FalseOpnd = N->getOperand(2);
16455 EVT VT = N->getOperand(1).getValueType();
16456
16457 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16458 FalseOpnd.getOpcode() != ISD::SUB)
16459 return SDValue();
16460
16461 // ABSD only available for type v4i32/v8i16/v16i8
16462 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16463 return SDValue();
16464
16465 // At least to save one more dependent computation
16466 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16467 return SDValue();
16468
16469 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16470
16471 // Can only handle unsigned comparison here
16472 switch (CC) {
16473 default:
16474 return SDValue();
16475 case ISD::SETUGT:
16476 case ISD::SETUGE:
16477 break;
16478 case ISD::SETULT:
16479 case ISD::SETULE:
16480 std::swap(TrueOpnd, FalseOpnd);
16481 break;
16482 }
16483
16484 SDValue CmpOpnd1 = Cond.getOperand(0);
16485 SDValue CmpOpnd2 = Cond.getOperand(1);
16486
16487 // SETCC CmpOpnd1 CmpOpnd2 cond
16488 // TrueOpnd = CmpOpnd1 - CmpOpnd2
16489 // FalseOpnd = CmpOpnd2 - CmpOpnd1
16490 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16491 TrueOpnd.getOperand(1) == CmpOpnd2 &&
16492 FalseOpnd.getOperand(0) == CmpOpnd2 &&
16493 FalseOpnd.getOperand(1) == CmpOpnd1) {
16494 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16495 CmpOpnd1, CmpOpnd2,
16496 DAG.getTargetConstant(0, dl, MVT::i32));
16497 }
16498
16499 return SDValue();
16500}
unsigned const MachineRegisterInfo * MRI
if(Register::isVirtualRegister(Reg)) return MRI -> getRegClass(Reg) ->hasSuperClassEq(&AArch64::GPR64RegClass)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
SDLoc DL(N)
Simplify Addr given that the top byte of it is ignored by HW during / address translation.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
return DAG getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops)
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:26
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition Compiler.h:280
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition Debug.h:122
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
uint64_t Align
uint64_t Offset
uint64_t Addr
uint32_t Index
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define F(x, y, z)
Definition MD5.cpp:56
#define I(x, y, z)
Definition MD5.cpp:59
#define G(x, y, z)
Definition MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static bool isConstantOrUndef(const SDValue Op)
Module.h This file contains the declarations for the Module class.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static cl::opt< bool > EnableSoftFP128("enable-soft-fp128", cl::desc("temp option to enable soft fp128"), cl::Hidden)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG)
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64)
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSplat(ArrayRef< Value * > VL)
Shadow Stack GC Lowering
static bool Enabled
Definition Statistic.cpp:50
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:169
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:197
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:458
static bool is64Bit(const char *name)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:4817
bool isDenormal() const
Definition APFloat.h:1207
APInt bitcastToAPInt() const
Definition APFloat.h:1133
bool isPosZero() const
Definition APFloat.h:1217
Class for arbitrary precision integers.
Definition APInt.h:70
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1525
APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:930
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1631
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition APInt.h:1442
APInt abs() const
Get the absolute value;.
Definition APInt.h:1868
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition APInt.h:567
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:364
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:483
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1782
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:469
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition APInt.h:667
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition APInt.h:655
This class represents an incoming formal argument to a Function.
Definition Argument.h:29
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class holds the attributes for a function, its return value, and its parameters.
Definition Attributes.h:365
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:59
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:107
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition Constants.h:851
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
bool needsCustom() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
unsigned getValNo() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:273
This is the shared class of boolean and integer constants.
Definition Constants.h:77
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:111
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:240
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:150
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:207
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:65
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:685
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.h:355
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:228
const Function & getFunction() const
Definition Function.h:135
arg_iterator arg_begin()
Definition Function.h:762
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:298
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.h:345
size_t arg_size() const
Definition Function.h:795
const GlobalValue * getGlobal() const
const GlobalObject * getBaseObject() const
Definition Globals.cpp:467
StringRef getSection() const
Definition Globals.cpp:162
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
bool hasComdat() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2673
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition InlineAsm.h:337
static unsigned getKind(unsigned Flags)
Definition InlineAsm.h:326
const BasicBlock * getParent() const
Definition Instruction.h:94
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition LoopInfo.h:143
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition LoopInfo.h:96
block_iterator block_end() const
Definition LoopInfo.h:177
block_iterator block_begin() const
Definition LoopInfo.h:176
Context object for machine code objects.
Definition MCContext.h:68
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:22
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:381
Machine Value Type.
static mvt_range fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static mvt_range integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineInstr - Allocate a new MachineInstr.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition Module.cpp:532
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
bool useLongCalls() const
bool hasFRSQRTE() const
bool is32BitELFABI() const
bool hasMMA() const
unsigned descriptorTOCAnchorOffset() const
bool hasFPCVT() const
bool isAIXABI() const
bool useSoftFloat() const
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
bool hasAltivec() const
bool allowsUnalignedFPAccess() const
const PPCFrameLowering * getFrameLowering() const override
bool needsSwapsForVSXMemOps() const
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
bool hasFSQRT() const
bool hasP9Vector() const
bool hasFRE() const
bool hasFRSQRTES() const
MCRegister getEnvironmentPointerRegister() const
const PPCInstrInfo * getInstrInfo() const override
bool hasFPU() const
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
bool hasRecipPrec() const
bool hasSTFIWX() const
bool isSVR4ABI() const
bool hasInvariantFunctionDescriptors() const
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
POPCNTDKind hasPOPCNTD() const
bool hasEFPU2() const
bool hasPrefixInstrs() const
bool hasPartwordAtomics() const
bool hasSPE() const
bool hasLFIWAX() const
bool isLittleEndian() const
bool hasFCPSGN() const
bool isTargetLinux() const
bool hasP9Altivec() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
bool is64BitELFABI() const
bool hasFPRND() const
bool isELFv2ABI() const
bool hasP8Vector() const
bool pairedVectorMemops() const
const PPCTargetMachine & getTargetMachine() const
bool isPredictableSelectIsExpensive() const
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
bool isISA3_1() const
bool hasLDBRX() const
const PPCRegisterInfo * getRegisterInfo() const override
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
bool isISA3_0() const
bool hasVSX() const
bool hasDirectMove() const
bool hasP8Altivec() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getStackProbeSize(MachineFunction &MF) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
bool hasInlineStackProbe(MachineFunction &MF) const override
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:71
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
const SDNodeFlags getFlags() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0)
Test whether V has a splatted value for all the demanded elements.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
const_iterator begin() const
Definition SmallSet.h:223
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:164
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:180
const_iterator end() const
Definition SmallSet.h:229
LLVM_NODISCARD bool empty() const
Definition SmallVector.h:73
size_t size() const
Definition SmallVector.h:70
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
LLVM_NODISCARD T pop_back_val()
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition LoopUtils.h:52
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition TypeSize.h:130
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:57
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:511
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition StringRef.h:160
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:152
A switch()-like statement whose cases are string literals.
LLVM_NODISCARD R Default(T Value)
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:80
static TypeSize Fixed(ScalarTy MinVal)
Definition TypeSize.h:418
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:180
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:122
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:202
Value * getOperand(unsigned i) const
Definition User.h:169
unsigned getNumOperands() const
Definition User.h:191
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:246
User * user_back()
Definition Value.h:410
self_iterator getIterator()
Definition ilist_node.h:81
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:651
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:229
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition ISDOpcodes.h:954
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition ISDOpcodes.h:950
@ TargetConstantPool
Definition ISDOpcodes.h:161
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:456
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition ISDOpcodes.h:772
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:140
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:243
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:615
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition ISDOpcodes.h:983
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:262
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:232
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:863
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:681
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:460
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:192
@ GlobalAddress
Definition ISDOpcodes.h:71
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:688
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:513
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:371
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:589
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:248
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:800
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:790
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:222
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:392
@ GlobalTLSAddress
Definition ISDOpcodes.h:72
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:675
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:430
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:558
@ TargetExternalSymbol
Definition ISDOpcodes.h:162
@ BR
Control flow instructions. These all have token chains.
Definition ISDOpcodes.h:879
@ TargetJumpTable
Definition ISDOpcodes.h:160
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition ISDOpcodes.h:857
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:808
@ BR_CC
BR_CC - Conditional branch.
Definition ISDOpcodes.h:905
@ BR_JT
BR_JT - Jumptable branch.
Definition ISDOpcodes.h:888
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:329
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:628
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:215
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition ISDOpcodes.h:979
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:157
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:570
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:606
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:550
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:541
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition ISDOpcodes.h:847
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:505
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:678
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:643
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition ISDOpcodes.h:840
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition ISDOpcodes.h:873
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:696
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition ISDOpcodes.h:575
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:775
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:637
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:429
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition ISDOpcodes.h:925
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:122
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:87
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:423
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:445
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:422
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:734
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:450
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:581
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:177
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:272
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:381
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:494
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:763
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:729
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition ISDOpcodes.h:922
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:403
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:134
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:684
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition ISDOpcodes.h:974
@ BRCOND
BRCOND - Conditional branch.
Definition ISDOpcodes.h:898
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:664
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:59
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:470
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:320
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition ISDOpcodes.h:968
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:185
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:158
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:485
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ VecShuffle
Definition NVPTX.h:88
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:143
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:107
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition PPC.h:112
@ MO_TPREL_HA
Definition PPC.h:153
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition PPC.h:99
@ MO_TLS
Definition PPC.h:162
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition PPC.h:124
@ MO_TPREL_LO
Definition PPC.h:152
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:149
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:138
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:133
@ MO_HA
Definition PPC.h:150
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:103
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ XSMAXCDP
XSMAXCDP, XSMINCDP - C-type min/max instructions.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ XMC_PR
Program Code.
Definition XCOFF.h:40
@ XTY_ER
External reference.
Definition XCOFF.h:176
This class represents lattice values for constants.
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast_or_null(const Y &Val)
Definition Casting.h:354
static bool isIndirectCall(const MachineInstr &MI)
constexpr bool isUInt< 16 >(uint64_t x)
Definition MathExtras.h:409
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:364
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:148
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isInt< 32 >(int64_t x)
Definition MathExtras.h:374
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:497
constexpr bool isInt< 16 >(int64_t x)
Definition MathExtras.h:371
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition MathExtras.h:664
unsigned M1(unsigned Val)
Definition VE.h:372
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1505
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition MathExtras.h:696
const NoneType None
Definition None.h:23
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:492
bool convertToNonDenormSingle(APInt &ArgAPInt)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition STLExtras.h:1341
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition MathExtras.h:157
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:132
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition ArrayRef.h:458
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:140
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition Casting.h:334
LLVM_NODISCARD bool isa(const Y &Val)
Definition Casting.h:141
FunctionAddr Value
Definition InstrProf.h:115
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
@ Z
zlib style complession
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:158
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1581
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition MathExtras.h:673
unsigned M0(unsigned Val)
Definition VE.h:371
std::enable_if_t<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > cast(const Y &Val)
Definition Casting.h:254
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:380
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:762
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:221
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:778
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:418
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:944
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:163
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:190
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:178
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:355
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:121
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:246
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:131
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:333
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:345
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:278
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:341
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:146
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:285
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:290
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:126
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:141
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:407
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:136
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:66
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:119
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)